[GPU] Impls refactoring (#6603)

This commit is contained in:
Vladimir Paramuzov 2021-07-15 12:05:34 +03:00 committed by GitHub
parent 7be2b782ba
commit 788e76722f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
121 changed files with 3590 additions and 3851 deletions

View File

@ -2,11 +2,10 @@
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "cldnn/runtime/engine.hpp"
#include "cldnn/primitives/implementation_desc.hpp"
#include "topology.hpp"
@ -99,14 +98,6 @@ struct learning_params {
learning_params() : momentum(0.9f), weights_decay(0.0005f) {}
};
/// @brief Description of primitives implementation.
struct implementation_desc {
format::type output_format; ///< Output format.
std::string kernel_name; ///< GPU kernel name.
};
using implementation_forcing_map = std::map<primitive_id, implementation_desc>;
/// @brief Represents user-provided program build option.
struct build_option {
/// @brief Allow primitives fusing during program build (default: false).

View File

@ -0,0 +1,70 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "cldnn/runtime/tensor.hpp"
#include <map>
#include <ostream>
namespace cldnn {
/// @brief Primitives implementation type.
enum class impl_types : uint8_t {
cpu = 1 << 0,
common = 1 << 1,
ocl = 1 << 2,
any = 0xFF,
};
inline impl_types operator&(impl_types a, impl_types b) {
typedef std::underlying_type<impl_types>::type underlying_type;
return static_cast<impl_types>(static_cast<underlying_type>(a) & static_cast<underlying_type>(b));
}
inline impl_types operator|(impl_types a, impl_types b) {
typedef std::underlying_type<impl_types>::type underlying_type;
return static_cast<impl_types>(static_cast<underlying_type>(a) | static_cast<underlying_type>(b));
}
inline impl_types operator~(impl_types a) {
typedef std::underlying_type<impl_types>::type underlying_type;
return static_cast<impl_types>(~static_cast<underlying_type>(a));
}
inline std::ostream& operator<<(std::ostream& out, const impl_types& impl_type) {
switch (impl_type) {
case impl_types::cpu: out << "cpu"; break;
case impl_types::common: out << "common"; break;
case impl_types::ocl: out << "ocl"; break;
case impl_types::any: out << "any"; break;
default: out << "unknown"; break;
}
return out;
}
/// @brief Description of primitives implementation.
struct implementation_desc {
format::type output_format; ///< Output format.
std::string kernel_name; ///< GPU kernel name.
impl_types impl_type; ///< GPU implementation type.
implementation_desc() :
output_format(format::any),
kernel_name(""),
impl_type(impl_types::any) {}
implementation_desc(format::type output_format,
std::string kernel_name,
impl_types impl_type = impl_types::any) :
output_format(output_format),
kernel_name(kernel_name),
impl_type(impl_type) {}
};
using implementation_forcing_map = std::map<primitive_id, implementation_desc>;
} // namespace cldnn

View File

@ -38,12 +38,15 @@ file(GLOB __CLDNN_Headers__include
"${__CLDNN_Directory__include}/*.hpp"
)
set(__CLDNN_Directory__gpu "${CMAKE_CURRENT_SOURCE_DIR}/gpu")
set(__CLDNN_Label__gpu "gpu")
set(__CLDNN_Directory__impls "${CMAKE_CURRENT_SOURCE_DIR}/impls")
set(__CLDNN_Label__gpu "impls")
file(GLOB __CLDNN_Sources__gpu
"${__CLDNN_Directory__gpu}/*.h"
"${__CLDNN_Directory__gpu}/*.hpp"
"${__CLDNN_Directory__gpu}/*.cpp"
"${__CLDNN_Directory__impls}/common/*.hpp"
"${__CLDNN_Directory__impls}/common/*.cpp"
"${__CLDNN_Directory__impls}/cpu/*.hpp"
"${__CLDNN_Directory__impls}/cpu/*.cpp"
"${__CLDNN_Directory__impls}/ocl/*.hpp"
"${__CLDNN_Directory__impls}/ocl/*.cpp"
)
set(__CLDNN_Directory__cg_cache "${CLDNN__CODEGEN_INCDIR}")
@ -130,5 +133,5 @@ endif()
# ======================================================================================================
ie_sse42_optimization_flags(sse4_2_flags)
set_source_files_properties(gpu/detection_output_cpu.cpp half.cpp
set_source_files_properties(impls/cpu/detection_output.cpp half.cpp
PROPERTIES COMPILE_FLAGS "${sse4_2_flags}")

View File

@ -1,126 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "activation_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "activation/activation_kernel_selector.h"
#include "activation/activation_kernel_base.h"
#include "register_gpu.hpp"
namespace cldnn {
namespace gpu {
struct activation_gpu : typed_primitive_gpu_impl<activation> {
using parent = typed_primitive_gpu_impl<activation>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<activation_gpu>(*this);
}
kernel_arguments_data get_arguments(typed_primitive_inst<activation>& instance, int32_t split) const override {
kernel_arguments_data args = parent::get_arguments(instance, split);
if (_outer.is_parameterized()) {
args.slope = instance.slope_memory();
}
return args;
}
static primitive_impl* create(const activation_node& arg) {
auto activation_params = get_default_params<kernel_selector::activation_params>(arg);
auto activation_optional_params =
get_default_optional_params<kernel_selector::activation_optional_params>(arg.get_program());
convert_new_activation_func(arg.get_primitive(), activation_params.activations);
if (arg.is_parameterized()) {
const auto& slope_layout = arg.slope_input().get_output_layout();
const auto& output_layout = arg.get_output_layout();
const auto params_num =
kernel_selector::GetActivationAdditionalParamsNumber(activation_params.activations[0].function);
CLDNN_ERROR_LESS_THAN(arg.id(),
"Slope layout size count",
slope_layout.size.count(),
"output_layout.size.feature[0] * params_num",
static_cast<size_t>(output_layout.size.feature[0] * params_num),
"Error - not enough data inside additional params buffer");
activation_params.inputActivationParams.push_back(convert_data_tensor(slope_layout));
}
auto& kernel_selector = kernel_selector::activation_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(activation_params, activation_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto activation = new activation_gpu(arg, best_kernels[0]);
return activation;
}
};
namespace detail {
attach_activation_gpu::attach_activation_gpu() {
auto val_fw = activation_gpu::create;
implementation_map<activation>::add({
{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
{std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw},
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw},
{std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw},
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw},
{std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw},
{std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw},
// block f16 format
{std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw},
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw},
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw},
// 3D
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw},
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw },
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw },
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), val_fw },
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw },
// bfwzyx
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw},
// fs_b_yx_fsv32
{std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw},
});
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,86 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "average_unpooling_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "average_unpooling/average_unpooling_kernel_selector.h"
#include "average_unpooling/average_unpooling_kernel_base.h"
namespace cldnn {
namespace gpu {
struct average_unpooling_gpu : typed_primitive_gpu_impl<average_unpooling> {
using parent = typed_primitive_gpu_impl<average_unpooling>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<average_unpooling_gpu>(*this);
}
protected:
kernel_arguments_data get_arguments(typed_primitive_inst<average_unpooling>& instance, int32_t split) const override {
kernel_arguments_data args = parent::get_arguments(instance, split);
return args;
}
public:
static primitive_impl* create(const average_unpooling_node& arg) {
auto average_unpooling_params = get_default_params<kernel_selector::average_unpooling_params>(arg);
auto average_unpooling_optional_params =
get_default_optional_params<kernel_selector::average_unpooling_optional_params>(arg.get_program());
auto& params = average_unpooling_params;
auto primitive = arg.get_primitive();
auto stride = primitive->stride;
params.unpoolSize = {
(uint32_t)primitive->size.spatial[0],
(uint32_t)primitive->size.spatial[1],
};
params.unpoolStride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1]};
auto& kernel_selector = kernel_selector::average_unpooling_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(average_unpooling_params, average_unpooling_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto average_unpool = new average_unpooling_gpu(arg, best_kernels[0]);
return average_unpool;
}
};
namespace detail {
attach_average_unpooling_gpu::attach_average_unpooling_gpu() {
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
average_unpooling_gpu::create);
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
average_unpooling_gpu::create);
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
average_unpooling_gpu::create);
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
average_unpooling_gpu::create);
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
average_unpooling_gpu::create);
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
average_unpooling_gpu::create);
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
average_unpooling_gpu::create);
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
average_unpooling_gpu::create);
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
average_unpooling_gpu::create);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,77 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "batch_to_space_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "kernel_selector_helper.h"
#include "batch_to_space/batch_to_space_kernel_selector.h"
#include "batch_to_space/batch_to_space_kernel_ref.h"
#include "cldnn/runtime/error_handler.hpp"
#include "data_inst.h"
#include <vector>
using namespace cldnn;
namespace cldnn {
namespace gpu {
struct batch_to_space_gpu : typed_primitive_gpu_impl<batch_to_space> {
using parent = typed_primitive_gpu_impl<batch_to_space>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<batch_to_space_gpu>(*this);
}
public:
static primitive_impl* create(const batch_to_space_node& arg) {
auto batch_to_space_params = get_default_params<kernel_selector::batch_to_space_params>(arg);
auto batch_to_space_optional_params =
get_default_optional_params<kernel_selector::batch_to_space_optional_params>(arg.get_program());
auto primitive = arg.get_primitive();
batch_to_space_params.block_shape = convert_dim_vector(primitive->block_shape);
batch_to_space_params.crops_begin = convert_dim_vector(primitive->crops_begin);
batch_to_space_params.crops_end = convert_dim_vector(primitive->crops_end);
auto& kernel_selector = kernel_selector::batch_to_space_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(batch_to_space_params, batch_to_space_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto batch_to_space = new batch_to_space_gpu(arg, best_kernels[0]);
return batch_to_space;
}
};
namespace detail {
attach_batch_to_space_gpu::attach_batch_to_space_gpu() {
auto val_fw = batch_to_space_gpu::create;
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,100 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "border_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "kernel_selector_helper.h"
#include "border/border_kernel_selector.h"
#include "border/border_kernel_base.h"
#include "cldnn/runtime/error_handler.hpp"
namespace cldnn {
namespace gpu {
struct border_gpu : typed_primitive_gpu_impl<border> {
using parent = typed_primitive_gpu_impl<border>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<border_gpu>(*this);
}
static primitive_impl* create(const border_node& arg) {
auto b_params = get_default_params<kernel_selector::border_params>(arg, 1);
auto b_optional_params =
get_default_optional_params<kernel_selector::border_optional_params>(arg.get_program());
auto desc = arg.get_primitive();
b_params.lt_sizes = convert_dim_vector(desc->left_top_sizes);
b_params.rb_sizes = convert_dim_vector(desc->right_bottom_sizes);
b_params.border_value = desc->border_value;
switch (desc->type) {
case border_type::constant:
b_params.b_type = kernel_selector::border_type::CONSTANT;
break;
case border_type::edge:
b_params.b_type = kernel_selector::border_type::EDGE;
break;
case border_type::mirror:
b_params.b_type = kernel_selector::border_type::MIRROR;
break;
case border_type::mirror_101:
b_params.b_type = kernel_selector::border_type::MIRROR_101;
break;
default:
assert(
false &&
"Encountered unhandled enum case: border_type during translation to kernel selector enumeration.");
}
auto& kernel_selector = kernel_selector::border_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(b_params, b_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
return new border_gpu(arg, best_kernels[0]);
}
};
namespace detail {
attach_border_gpu::attach_border_gpu() {
auto val_fw = border_gpu::create;
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,163 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "concatenation_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "concatenation/concatenation_kernel_selector.h"
#include "concatenation/concatenation_kernel_base.h"
#include <initializer_list>
namespace cldnn {
namespace gpu {
namespace {
kernel_selector::concat_axis convert_axis(concatenation::concatenation_axis axis) {
switch (axis) {
case concatenation::along_x:
return kernel_selector::concat_axis::X;
case concatenation::along_y:
return kernel_selector::concat_axis::Y;
case concatenation::along_z:
return kernel_selector::concat_axis::Z;
case concatenation::along_w:
return kernel_selector::concat_axis::W;
case concatenation::along_f:
return kernel_selector::concat_axis::FEATURE;
case concatenation::along_b:
return kernel_selector::concat_axis::BATCH;
default:
return kernel_selector::concat_axis::X;
}
}
} // namespace
struct concatenation_gpu : typed_primitive_gpu_impl<concatenation> {
using parent = typed_primitive_gpu_impl<concatenation>;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<concatenation_gpu>(*this);
}
concatenation_gpu(const concatenation_node& arg, const kernel_selector::kernel_data& kd) : parent(arg, kd) {
if (!_outer.can_be_optimized()) {
CLDNN_ERROR_NOT_EQUAL(_outer.id(),
"Input count",
_outer.inputs_count(),
"kds size",
kd.kernels.size(),
"Error - not enough kernels for concatenation");
}
}
protected:
bool optimized_out(concatenation_inst& instance) const override {
return parent::optimized_out(instance) || _outer.can_be_optimized();
}
public:
static primitive_impl* create(const concatenation_node& arg) {
if (arg.can_be_optimized()) {
return new concatenation_gpu(arg, {});
}
auto concat_params = get_default_params<kernel_selector::concatenation_params>(arg);
auto concat_optional_params =
get_default_optional_params<kernel_selector::concatenation_optional_params>(arg.get_program());
auto axis = arg.get_primitive()->axis;
concat_params.inputs.resize(arg.inputs_count());
for (size_t i = 0; i < arg.inputs_count(); ++i) {
const layout& input_layout = arg.input(i).get_output_layout();
concat_params.inputs[i] = convert_data_tensor(input_layout);
}
concat_params.axis = convert_axis(axis);
concat_optional_params.kernelPerInput = true;
auto& kernel_selector = kernel_selector::concatenation_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(concat_params, concat_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
concatenation_gpu* concat = new concatenation_gpu(arg, best_kernels[0]);
return concat;
}
};
namespace detail {
attach_concatenation_gpu::attach_concatenation_gpu() {
implementation_map<concatenation>::add({
{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), concatenation_gpu::create},
// 5D
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i64, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), concatenation_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), concatenation_gpu::create },
// block f16 format
{std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), concatenation_gpu::create},
// MMAD
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), concatenation_gpu::create},
// 6D
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), concatenation_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), concatenation_gpu::create},
});
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,127 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "crop_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "kernel_selector_helper.h"
#include "eltwise/eltwise_kernel_selector.h"
#include "eltwise/eltwise_kernel_base.h"
#include "cldnn/runtime/error_handler.hpp"
namespace cldnn {
namespace gpu {
struct crop_gpu : typed_primitive_gpu_impl<crop> {
using parent = typed_primitive_gpu_impl<crop>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<crop_gpu>(*this);
}
protected:
bool optimized_out(crop_inst& instance) const override {
return parent::optimized_out(instance) || _outer.can_be_optimized();
}
public:
static primitive_impl* create(const crop_node& arg) {
auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg, 1);
auto ew_optional_params =
get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
ew_params.operations.push_back(
{{kernel_selector::eltwise_params::InputType::Buffer(0)}, kernel_selector::eltwise_mode::ASSIGN});
const auto& input_layout = arg.input().get_output_layout();
ew_params.inputs[0] = convert_data_tensor(input_layout, 1, arg.get_primitive()->offsets);
auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto crop = new crop_gpu(arg, best_kernels[0]);
return crop;
}
};
namespace detail {
attach_crop_gpu::attach_crop_gpu() {
auto val_fw = crop_gpu::create;
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::fyxb), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::fyxb), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fyxb), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::fyxb), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bs_fs_zyx_bsv16_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), val_fw);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,173 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "deconvolution_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "deconvolution/deconvolution_kernel_selector.h"
#include "deconvolution/deconvolution_kernel_base.h"
#include <algorithm>
namespace cldnn {
namespace gpu {
struct deconvolution_gpu : typed_primitive_gpu_impl<deconvolution> {
using parent = typed_primitive_gpu_impl<deconvolution>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<deconvolution_gpu>(*this);
}
protected:
// TODO: share it with convolution and fully connected
bool validate_impl(const typed_primitive_inst<deconvolution>&) const override {
bool res = true;
CLDNN_ERROR_NOT_EQUAL(_outer.id(),
"deconvolution filling value",
_outer.get_output_layout().data_padding.filling_value(),
"padding mode",
0.0f,
"Unknown padding mode in deconvolution.");
return res;
}
kernel_arguments_data get_arguments(typed_primitive_inst<deconvolution>& instance, int32_t split) const override {
kernel_arguments_data args = parent::get_arguments(instance, split);
args.weights = instance.weights_memory(split);
args.bias = instance.bias_term() ? instance.bias_memory(split) : nullptr;
return args;
}
int32_t get_split() const override { return _outer.get_split(); }
uint32_t get_groups() const override { return _outer.get_groups(); }
public:
static primitive_impl* create(const deconvolution_node& arg) {
const auto& primitive = arg.get_primitive();
const auto& weights_layout = arg.weights(0).get_output_layout();
const auto& weights_size = weights_layout.size;
const auto& split = primitive->split();
const auto& stride = primitive->stride;
#if 0 // TODO: support dilation
const auto& dilation = primitive->dilation;
#else
const tensor dilation = {0, 0, 1, 1, 1};
#endif
const auto actual_split = split;
const auto& input_offset = primitive->input_offset;
const auto& groups = primitive->groups;
auto deconv_params = get_weights_bias_default_params<kernel_selector::deconvolution_params>(
arg,
(groups > 1) ? 1 : actual_split,
1,
primitive->grouped_weights_shape);
auto deconv_optional_params =
get_default_weights_bias_optional_params<kernel_selector::deconvolution_optional_params>(arg.get_program());
deconv_params.split = split;
deconv_params.groups = groups;
auto spatial_size = arg.get_output_layout().format.dimension() - 2;
uint32_t kx = weights_size.spatial[0];
uint32_t ky = weights_size.spatial[1];
uint32_t kz = spatial_size == 2 ? 1 : weights_size.spatial[2];
deconv_params.filterSize = { kx, ky, kz };
deconv_params.padding = {(uint32_t)std::max(-input_offset.spatial[0], 0),
(uint32_t)std::max(-input_offset.spatial[1], 0),
(uint32_t)std::max(-input_offset.spatial[2], 0)};
deconv_params.stride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1], (uint32_t)stride.spatial[2]};
deconv_params.dilation = {(uint32_t)dilation.spatial[0],
(uint32_t)dilation.spatial[1],
(uint32_t)dilation.spatial[2]};
auto& kernel_selector = kernel_selector::deconvolution_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(deconv_params, deconv_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with these arguments");
auto deconv = new deconvolution_gpu(arg, best_kernels[0]);
return deconv;
}
};
namespace detail {
attach_deconvolution_gpu::attach_deconvolution_gpu() {
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
deconvolution_gpu::create);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,70 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "depth_to_space_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "kernel_selector_helper.h"
#include "depth_to_space/depth_to_space_kernel_selector.h"
#include "depth_to_space/depth_to_space_kernel_ref.h"
#include "cldnn/runtime/error_handler.hpp"
#include "common_types.h"
using namespace cldnn;
namespace cldnn {
namespace gpu {
struct depth_to_space_gpu : typed_primitive_gpu_impl<depth_to_space> {
using parent = typed_primitive_gpu_impl<depth_to_space>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<depth_to_space_gpu>(*this);
}
public:
static primitive_impl* create(const depth_to_space_node& arg) {
auto depth_to_space_params = get_default_params<kernel_selector::depth_to_space_params>(arg);
auto depth_to_space_optional_params =
get_default_optional_params<kernel_selector::depth_to_space_optional_params>(arg.get_program());
depth_to_space_params.block_size = arg.get_primitive()->block_size;
depth_to_space_params.mode = arg.get_primitive()->mode == depth_to_space_mode::blocks_first ? kernel_selector::depth_to_space_mode::BLOCKS_FIRST
: kernel_selector::depth_to_space_mode::DEPTH_FIRST;
auto& kernel_selector = kernel_selector::depth_to_space_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(depth_to_space_params, depth_to_space_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto depth_to_space = new depth_to_space_gpu(arg, best_kernels[0]);
return depth_to_space;
}
};
namespace detail {
attach_depth_to_space_gpu::attach_depth_to_space_gpu() {
auto val_fw = depth_to_space_gpu::create;
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,195 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "eltwise_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "eltwise/eltwise_kernel_selector.h"
#include "eltwise/eltwise_kernel_base.h"
#include <vector>
namespace cldnn {
namespace gpu {
struct eltwise_gpu : typed_primitive_gpu_impl<eltwise> {
using parent = typed_primitive_gpu_impl<eltwise>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<eltwise_gpu>(*this);
}
protected:
kernel_arguments_data get_arguments(typed_primitive_inst<eltwise>& instance, int32_t split) const override {
kernel_arguments_data args = parent::get_arguments(instance, split);
return args;
}
public:
static primitive_impl* create(const eltwise_node& arg) {
auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
auto ew_optional_params =
get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
for (size_t i = 1; i < arg.inputs_count(); i++) {
ew_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
}
const auto& primitive = arg.get_primitive();
ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0),
kernel_selector::eltwise_params::InputType::Buffer(1)},
convert_to_eltwise_mode(primitive->mode)});
for (uint32_t i = 2; i < static_cast<uint32_t>(arg.inputs_count()); i++) {
ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(i - 2),
kernel_selector::eltwise_params::InputType::Buffer(i)},
convert_to_eltwise_mode(primitive->mode)});
}
if (primitive->mode == eltwise_mode::sum) {
ew_params.coefficients = primitive->coefficients;
}
for (size_t i = 0; i < ew_params.inputs.size(); i++) {
if (!ew_params.inputs[i].SameDims(ew_params.output)) {
std::vector<int32_t> input_size = arg.input(i).get_output_layout().size.raw.vector();
std::vector<int32_t> output_size = arg.get_output_layout().size.raw.vector();
bool broadcast = false;
for (size_t d = 0; d < output_size.size(); d++) {
if (output_size[d] != 1 && input_size[d] == 1)
broadcast = true;
}
if (broadcast) {
ew_params.broadcast = true;
break;
} else {
ew_params.layoutBased = true;
break;
}
}
}
// stride
if (!primitive->stride.empty()) {
const auto& stride = primitive->stride;
ew_params.stride.resize(stride.size());
for (size_t i = 0; i < primitive->stride.size(); i++) {
ew_params.stride[i] = {(uint32_t)stride[i].spatial[0],
(uint32_t)stride[i].spatial[1],
(uint32_t)stride[i].spatial[2]};
}
}
// check if strides are the same
if (!ew_params.stride.empty()) {
const auto& stride = ew_params.stride[0];
for (size_t i = 1; i < ew_params.stride.size(); i++) {
if (stride.x != ew_params.stride[i].x || stride.y != ew_params.stride[i].y)
ew_params.layoutBased = true;
}
} else if (!ew_params.inputs[0].SameDimsSizes(ew_params.inputs[1])) {
ew_params.broadcast = true;
}
// TODO [LOW PRECISION]: check if this parameter's really needed. Maybe data types are enough
bool quantization = true;
for (size_t i = 0; i < arg.inputs_count(); i++) {
if (arg.input(i).get_output_layout().data_type != data_types::u8 &&
arg.input(i).get_output_layout().data_type != data_types::i8) {
quantization = false;
}
}
ew_params.int8_quantization = quantization;
auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto eltwise = new eltwise_gpu(arg, best_kernels[0]);
return eltwise;
}
};
namespace detail {
attach_eltwise_gpu::attach_eltwise_gpu() {
implementation_map<eltwise>::add(
{{ std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), eltwise_gpu::create },
// block f16
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), eltwise_gpu::create },
// 3D
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), eltwise_gpu::create },
// 4D
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i64, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), eltwise_gpu::create },
// MMAD
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), eltwise_gpu::create },
//
{ std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), eltwise_gpu::create }});
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,68 +0,0 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "gather_nd_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "kernel_selector_helper.h"
#include "gather/gather_nd_kernel_selector.h"
#include "gather/gather_nd_kernel_ref.h"
using namespace cldnn;
namespace cldnn {
namespace gpu {
struct gather_nd_gpu : typed_primitive_gpu_impl<gather_nd> {
using parent = typed_primitive_gpu_impl<gather_nd>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<gather_nd_gpu>(*this);
}
static primitive_impl* create(const gather_nd_node& arg) {
auto gather_nd_params = get_default_params<kernel_selector::gather_nd_params>(arg);
auto gather_nd_optional_params =
get_default_optional_params<kernel_selector::gather_nd_optional_params>(arg.get_program());
gather_nd_params.indices_rank = arg.get_primitive()->indices_rank;
gather_nd_params.batch_dims = arg.get_primitive()->batch_dims;
gather_nd_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout()));
auto& kernel_selector = kernel_selector::gather_nd_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(gather_nd_params, gather_nd_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto gather_nd = new gather_nd_gpu(arg, best_kernels[0]);
return gather_nd;
}
};
namespace detail {
attach_gather_nd_gpu::attach_gather_nd_gpu() {
auto val_fw = gather_nd_gpu::create;
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,266 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <initializer_list>
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <utility>
namespace cldnn {
namespace gpu {
namespace mputils {
template <typename... Tys>
struct type_tuple;
template <std::size_t... Idxs>
struct index_tuple {};
// -----------------------------------------------------------------------------------------------------------------------
template <typename TypeTupleTy, typename ElemTy>
struct count_tt;
template <typename Ty, typename... Tys, typename ElemTy>
struct count_tt<type_tuple<Ty, Tys...>, ElemTy>
: std::integral_constant<std::size_t,
count_tt<type_tuple<Tys...>, ElemTy>::value +
static_cast<std::size_t>(std::is_same<Ty, ElemTy>::value)> {};
template <typename ElemTy>
struct count_tt<type_tuple<>, ElemTy> : std::integral_constant<std::size_t, 0> {};
// -----------------------------------------------------------------------------------------------------------------------
template <typename TypeTupleTy>
struct size_tt;
template <typename... Tys>
struct size_tt<type_tuple<Tys...>> : std::integral_constant<std::size_t, sizeof...(Tys)> {};
// -----------------------------------------------------------------------------------------------------------------------
template <typename TypeTupleTy, typename ElemTy>
struct split_tt;
namespace detail {
template <typename TypeTupleTy, typename ElemTy, typename FirstTupleTy>
struct split_tt_helper1;
template <typename Ty, typename... Tys, typename ElemTy, typename... FirstTys>
struct split_tt_helper1<type_tuple<Ty, Tys...>, ElemTy, type_tuple<FirstTys...>>
: split_tt_helper1<type_tuple<Tys...>, ElemTy, type_tuple<FirstTys..., Ty>> {};
template <typename Ty, typename... Tys, typename... FirstTys>
struct split_tt_helper1<type_tuple<Ty, Tys...>, Ty, type_tuple<FirstTys...>> {
using first_type = type_tuple<FirstTys...>;
using second_type = type_tuple<Tys...>;
};
template <typename ElemTy, typename... FirstTys>
struct split_tt_helper1<type_tuple<>, ElemTy, type_tuple<FirstTys...>> {
using first_type = type_tuple<>;
using second_type = type_tuple<FirstTys...>;
};
} // namespace detail
template <typename... Tys, typename ElemTy>
struct split_tt<type_tuple<Tys...>, ElemTy> : detail::split_tt_helper1<type_tuple<Tys...>, ElemTy, type_tuple<>> {};
// -----------------------------------------------------------------------------------------------------------------------
template <typename TypeTupleTy, typename ElemTy>
struct index_of_tt;
static constexpr std::size_t npos = static_cast<std::size_t>(-1);
namespace detail {
template <typename TypeTupleTy, typename ElemTy, std::size_t Pos>
struct index_of_tt_helper1;
template <typename Ty, typename... Tys, typename ElemTy, std::size_t Pos>
struct index_of_tt_helper1<type_tuple<Ty, Tys...>, ElemTy, Pos>
: index_of_tt_helper1<type_tuple<Tys...>, ElemTy, Pos + 1> {};
template <typename Ty, typename... Tys, std::size_t Pos>
struct index_of_tt_helper1<type_tuple<Ty, Tys...>, Ty, Pos> : std::integral_constant<std::size_t, Pos> {};
template <typename ElemTy, std::size_t Pos>
struct index_of_tt_helper1<type_tuple<>, ElemTy, Pos> : std::integral_constant<std::size_t, npos> {};
} // namespace detail
template <typename... Tys, typename ElemTy>
struct index_of_tt<type_tuple<Tys...>, ElemTy> : detail::index_of_tt_helper1<type_tuple<Tys...>, ElemTy, 0> {};
// -----------------------------------------------------------------------------------------------------------------------
template <typename TypeTupleTy, typename ElemTy>
struct remove_tt;
namespace detail {
template <typename TypeTupleTy, typename ElemTy, typename ResultTupleTy>
struct remove_tt_helper1;
template <typename Ty, typename... Tys, typename ElemTy, typename... ResultTys>
struct remove_tt_helper1<type_tuple<Ty, Tys...>, ElemTy, type_tuple<ResultTys...>>
: remove_tt_helper1<type_tuple<Tys...>, ElemTy, type_tuple<ResultTys..., Ty>> {};
template <typename Ty, typename... Tys, typename... ResultTys>
struct remove_tt_helper1<type_tuple<Ty, Tys...>, Ty, type_tuple<ResultTys...>>
: remove_tt_helper1<type_tuple<Tys...>, Ty, type_tuple<ResultTys...>> {};
template <typename ElemTy, typename... ResultTys>
struct remove_tt_helper1<type_tuple<>, ElemTy, type_tuple<ResultTys...>> {
using type = type_tuple<ResultTys...>;
};
} // namespace detail
template <typename... Tys, typename ElemTy>
struct remove_tt<type_tuple<Tys...>, ElemTy> : detail::remove_tt_helper1<type_tuple<Tys...>, ElemTy, type_tuple<>> {};
template <typename TypeTupleTy, typename ElemTy>
using remove_tt_t = typename remove_tt<TypeTupleTy, ElemTy>::type;
// -----------------------------------------------------------------------------------------------------------------------
template <template <typename...> class VariadicTTy, typename TypeTupleTy>
struct make_vttype_tt;
template <template <typename...> class VariadicTTy, typename... Tys>
struct make_vttype_tt<VariadicTTy, type_tuple<Tys...>> {
using type = VariadicTTy<Tys...>;
};
template <template <typename...> class VariadicTTy, typename TypeTupleTy>
using make_vttype_tt_t = typename make_vttype_tt<VariadicTTy, TypeTupleTy>::type;
// -----------------------------------------------------------------------------------------------------------------------
template <typename TypeTupleTy>
struct make_indexer_tt;
namespace detail {
template <typename TypeTupleTy, std::size_t Idx, typename IdxTupleTy>
struct make_indexer_tt_helper1;
template <typename Ty, typename... Tys, std::size_t Idx, std::size_t... Idxs>
struct make_indexer_tt_helper1<type_tuple<Ty, Tys...>, Idx, index_tuple<Idxs...>>
: make_indexer_tt_helper1<type_tuple<Tys...>, Idx + 1, index_tuple<Idxs..., Idx>> {};
template <std::size_t Idx, typename IdxTupleTy>
struct make_indexer_tt_helper1<type_tuple<>, Idx, IdxTupleTy> {
using type = IdxTupleTy;
};
} // namespace detail
template <typename... Tys>
struct make_indexer_tt<type_tuple<Tys...>> : detail::make_indexer_tt_helper1<type_tuple<Tys...>, 0, index_tuple<>> {};
template <typename TypeTupleTy>
using make_indexer_tt_t = typename make_indexer_tt<TypeTupleTy>::type;
// -----------------------------------------------------------------------------------------------------------------------
namespace detail {
template <template <typename> class DefaultValSelectorTTy,
std::size_t DefaultedStartPos,
std::size_t Idx,
typename ArgTy>
constexpr auto select_arg_or_default(ArgTy&& arg) -> typename std::decay<ArgTy>::type {
return (Idx < DefaultedStartPos) ? std::forward<ArgTy>(arg)
: DefaultValSelectorTTy<typename std::decay<ArgTy>::type>::value;
}
template <template <typename> class DefaultValSelectorTTy,
std::size_t DefaultedStartPos,
std::size_t... Idxs,
typename... ArgTys>
constexpr auto make_partially_defaulted_std_tuple(index_tuple<Idxs...>&&, ArgTys&&... args)
-> std::tuple<typename std::decay<ArgTys>::type...> {
return std::make_tuple(
select_arg_or_default<DefaultValSelectorTTy, DefaultedStartPos, Idxs>(std::forward<ArgTys>(args))...);
}
} // namespace detail
template <template <typename> class DefaultValSelectorTTy, std::size_t DefaultedStartPos, typename... ArgTys>
constexpr auto make_partially_defaulted_std_tuple(ArgTys&&... args)
-> std::tuple<typename std::decay<ArgTys>::type...> {
return detail::make_partially_defaulted_std_tuple<DefaultValSelectorTTy, DefaultedStartPos>(
make_indexer_tt_t<type_tuple<ArgTys...>>(),
std::forward<ArgTys>(args)...);
}
// -----------------------------------------------------------------------------------------------------------------------
} // namespace mputils
/// Marker type that separates required selectors from optional ones in kernel selector signature.
struct kd_optional_selector_t {};
template <typename Ty>
struct kd_default_value_selector {
static constexpr Ty value = static_cast<Ty>(0);
};
template <typename KernelDataTy, typename OuterTy, std::size_t ReqSelectorCount, typename SelectorsTupleTy>
class kd_selector;
template <typename KernelDataTy, typename OuterTy, std::size_t ReqSelectorCount, typename... SelectorTys>
class kd_selector<KernelDataTy, OuterTy, ReqSelectorCount, mputils::type_tuple<SelectorTys...>> {
using _selector_types = mputils::type_tuple<SelectorTys...>;
static_assert(mputils::count_tt<_selector_types, kd_optional_selector_t>::value == 0,
"Optional selectors separator can be specified only in template alias. "
"Please do not use this class directly - use kd_selector_t alias instead.");
static_assert(mputils::size_tt<_selector_types>::value > 0, "At least one selector type must be specified.");
static_assert(ReqSelectorCount <= mputils::size_tt<_selector_types>::value,
"Number of required selectors is invalid.");
public:
using key_type = mputils::make_vttype_tt_t<std::tuple, _selector_types>;
using hash_type = std::hash<key_type>;
using mapped_type = KernelDataTy (*)(const OuterTy&);
using map_type = std::unordered_map<key_type, mapped_type, hash_type>;
using value_type = typename map_type::value_type;
private:
map_type _kernel_map;
template <std::size_t Idx>
KernelDataTy _get_kernel(mputils::index_tuple<Idx>&&, const OuterTy& outer, const SelectorTys&... selectors) {
auto value = _kernel_map.find(
mputils::make_partially_defaulted_std_tuple<kd_default_value_selector, Idx - 1>(selectors...));
if (value == _kernel_map.end())
return _get_kernel(mputils::index_tuple<Idx - 1>(), outer, selectors...);
return value->second(outer);
}
static KernelDataTy _get_kernel(mputils::index_tuple<ReqSelectorCount>&&, const OuterTy&, const SelectorTys&...) {
throw std::runtime_error("ERROR: no default element in map for kernel data!!!");
}
public:
kd_selector(const std::initializer_list<value_type>& l) : _kernel_map(l) {}
KernelDataTy get_kernel(const OuterTy& outer, const SelectorTys&... selectors) {
return _get_kernel(mputils::index_tuple<sizeof...(SelectorTys) + 1>(), outer, selectors...);
}
};
template <typename KernelDataTy, typename OuterTy, typename... SelectorTys>
using kd_selector_t =
kd_selector<KernelDataTy,
OuterTy,
mputils::index_of_tt<mputils::type_tuple<SelectorTys...>, kd_optional_selector_t>::value !=
mputils::npos
? mputils::index_of_tt<mputils::type_tuple<SelectorTys...>, kd_optional_selector_t>::value
: sizeof...(SelectorTys),
mputils::remove_tt_t<mputils::type_tuple<SelectorTys...>, kd_optional_selector_t>>;
} // namespace gpu
} // namespace cldnn

View File

@ -1,84 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "lrn_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "lrn/lrn_kernel_selector.h"
#include "lrn/lrn_kernel_base.h"
namespace cldnn {
namespace gpu {
struct lrn_gpu : typed_primitive_gpu_impl<lrn> {
using parent = typed_primitive_gpu_impl<lrn>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<lrn_gpu>(*this);
}
static primitive_impl* create(const lrn_node& arg) {
auto lrn_params = get_default_params<kernel_selector::lrn_params>(arg);
auto lrn_optional_params = get_default_optional_params<kernel_selector::lrn_optional_params>(arg.get_program());
const auto& primitive = arg.get_primitive();
lrn_params.alpha = primitive->alpha;
lrn_params.beta = primitive->beta;
lrn_params.k = primitive->k;
lrn_params.localSize = primitive->size;
lrn_params.divMode = kernel_selector::kernel_divider_mode::FIXED;
lrn_params.normMode = primitive->norm_region == lrn_norm_region_within_channel
? kernel_selector::lrn_mode::WITHIN_CHANNEL
: kernel_selector::lrn_mode::ACROSS_CHANNEL;
auto& kernel_selector = kernel_selector::lrn_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(lrn_params, lrn_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto lrn = new lrn_gpu(arg, best_kernels[0]);
return lrn;
}
};
namespace detail {
attach_lrn_gpu::attach_lrn_gpu() {
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), lrn_gpu::create);
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), lrn_gpu::create);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,32 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "mutable_data_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
namespace cldnn {
namespace gpu {
struct mutable_data_gpu : public typed_primitive_gpu_impl<mutable_data> {
using parent = typed_primitive_gpu_impl<mutable_data>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<mutable_data_gpu>(*this);
}
public:
static primitive_impl* create(mutable_data_node const& arg) { return new mutable_data_gpu(arg, {}); }
};
namespace detail {
attach_mutable_data_gpu::attach_mutable_data_gpu() {
implementation_map<mutable_data>::add({{engine_types::ocl, mutable_data_gpu::create}});
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,106 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "mvn_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "mvn/mvn_kernel_selector.h"
#include "mvn/mvn_kernel_base.h"
#include <algorithm>
using namespace cldnn;
namespace cldnn {
namespace gpu {
struct mvn_gpu : typed_primitive_gpu_impl<mvn> {
using parent = typed_primitive_gpu_impl<mvn>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<mvn_gpu>(*this);
}
public:
static primitive_impl* create(const mvn_node& arg) {
auto mvn_params = get_default_params<kernel_selector::mvn_params>(arg);
auto mvn_optional_params = get_default_optional_params<kernel_selector::mvn_optional_params>(arg.get_program());
mvn_params.mvnMode = arg.get_primitive()->across_channels ? kernel_selector::mvn_mode::ACROSS_CHANNELS
: kernel_selector::mvn_mode::WITHIN_CHANNELS;
mvn_params.mvnNormalizeVariance = arg.get_primitive()->normalize_variance;
mvn_params.epsilon = arg.get_primitive()->epsilon;
mvn_params.mvnEpsMode = arg.get_primitive()->eps_inside_sqrt ? kernel_selector::mvn_eps_mode::INSIDE_SQRT
: kernel_selector::mvn_eps_mode::OUTSIDE_SQRT;
auto& kernel_selector = kernel_selector::mvn_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(mvn_params, mvn_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto mvn = new mvn_gpu(arg, best_kernels[0]);
return mvn;
}
};
namespace detail {
attach_mvn_gpu::attach_mvn_gpu() {
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16),
mvn_gpu::create);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,93 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "normalize_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "normalize/normalize_kernel_selector.h"
#include "normalize/normalize_kernel_base.h"
#include <algorithm>
using namespace cldnn;
namespace cldnn {
namespace gpu {
struct normalize_gpu : typed_primitive_gpu_impl<normalize> {
using parent = typed_primitive_gpu_impl<normalize>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<normalize_gpu>(*this);
}
protected:
kernel_arguments_data get_arguments(typed_primitive_inst<normalize>& instance, int32_t split) const override {
kernel_arguments_data args = parent::get_arguments(instance, split);
args.scale_table = instance.scale_memory();
return args;
}
public:
static primitive_impl* create(const normalize_node& arg) {
auto norm_params = get_default_params<kernel_selector::normalize_params>(arg);
auto norm_optional_params =
get_default_optional_params<kernel_selector::normalize_optional_params>(arg.get_program());
const auto& scale_layout = arg.scale().get_output_layout();
norm_params.normMode = arg.get_primitive()->across_spatial ? kernel_selector::normalize_mode::ACROSS_SPATIAL
: kernel_selector::normalize_mode::WITHIN_SPATIAL;
norm_params.epsilon = arg.get_primitive()->epsilon;
norm_params.scaleTable = convert_data_tensor(scale_layout).FlattenFeatureAndSpatials();
auto& kernel_selector = kernel_selector::normalize_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(norm_params, norm_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto lrn = new normalize_gpu(arg, best_kernels[0]);
return lrn;
}
};
namespace detail {
attach_normalize_gpu::attach_normalize_gpu() {
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
normalize_gpu::create);
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
normalize_gpu::create);
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
normalize_gpu::create);
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
normalize_gpu::create);
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
normalize_gpu::create);
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
normalize_gpu::create);
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
normalize_gpu::create);
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb),
normalize_gpu::create);
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
normalize_gpu::create);
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
normalize_gpu::create);
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
normalize_gpu::create);
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf),
normalize_gpu::create);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,74 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "one_hot_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "kernel_selector_helper.h"
#include "one_hot/one_hot_kernel_selector.h"
#include "one_hot/one_hot_kernel_base.h"
#include "cldnn/runtime/error_handler.hpp"
#include <vector>
namespace cldnn {
namespace gpu {
struct one_hot_gpu : typed_primitive_gpu_impl<one_hot> {
using parent = typed_primitive_gpu_impl<one_hot>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<one_hot_gpu>(*this);
}
static primitive_impl* create(const one_hot_node& arg) {
auto oh_params = get_default_params<kernel_selector::one_hot_params>(arg, 1);
auto oh_optional_params =
get_default_optional_params<kernel_selector::one_hot_optional_params>(arg.get_program());
oh_params.one_hot_axis = arg.get_primitive()->one_hot_axis;
oh_params.on_value = arg.get_primitive()->on_value;
oh_params.off_value = arg.get_primitive()->off_value;
auto output_sizes = arg.get_output_layout().format == format::bfzyx ?
arg.get_output_layout().size.sizes(format::bfzyx) :
arg.get_output_layout().size.sizes(format::bfyx);
oh_params.one_hot_limit = output_sizes[oh_params.one_hot_axis];
auto& kernel_selector = kernel_selector::one_hot_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(oh_params, oh_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with these arguments");
return new one_hot_gpu(arg, best_kernels[0]);
}
};
namespace detail {
attach_one_hot_gpu::attach_one_hot_gpu() {
auto val_fw = one_hot_gpu::create;
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,174 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "quantize_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "kernel_selector_helper.h"
#include "quantize/quantize_kernel_selector.h"
#include "quantize/quantize_kernel_ref.h"
#include "cldnn/runtime/error_handler.hpp"
using namespace cldnn;
namespace cldnn {
namespace gpu {
struct quantize_gpu : typed_primitive_gpu_impl<quantize> {
using parent = typed_primitive_gpu_impl<quantize>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<quantize_gpu>(*this);
}
protected:
kernel_arguments_data get_arguments(typed_primitive_inst<quantize>& instance, int32_t) const override {
kernel_arguments_data args;
for (size_t i = 0; i < instance.inputs_memory_count(); i++) {
args.inputs.push_back(instance.input_memory_ptr(i));
}
if (instance.node.get_scale_shift_opt()) {
if (instance.node.get_dependencies().size() == 9) {
args.inputs.push_back(instance.dep_memory_ptr(5));
args.inputs.push_back(instance.dep_memory_ptr(6));
args.inputs.push_back(instance.dep_memory_ptr(7));
args.inputs.push_back(instance.dep_memory_ptr(8));
}
}
args.output = instance.output_memory_ptr();
return args;
}
public:
static primitive_impl* create(const quantize_node& arg) {
auto quantize_params = get_default_params<kernel_selector::quantize_params>(arg);
auto quantize_optional_params =
get_default_optional_params<kernel_selector::quantize_optional_params>(arg.get_program());
quantize_params.levels = arg.get_levels();
quantize_params.packed_binary_output = arg.get_packed_binary_output();
quantize_params.scale_shift_opt = arg.get_scale_shift_opt();
quantize_params.has_post_scale = arg.get_need_post_scale();
quantize_params.has_post_shift = arg.get_need_post_shift();
quantize_params.has_pre_shift = arg.get_need_pre_shift();
quantize_params.has_clamp = arg.get_need_clamp();
quantize_params.per_tensor_input_range = arg.get_per_tensor_input_range();
quantize_params.per_tensor_input_scale = arg.get_per_tensor_input_scale();
quantize_params.per_tensor_input_shift = arg.get_per_tensor_input_shift();
quantize_params.per_tensor_output_scale = arg.get_per_tensor_output_scale();
quantize_params.per_tensor_output_shift = arg.get_per_tensor_output_shift();
quantize_params.in_lo = arg.get_input_lo_val();
quantize_params.in_hi = arg.get_input_hi_val();
quantize_params.in_scale = arg.get_input_scale_val();
quantize_params.in_shift = arg.get_input_shift_val();
quantize_params.out_scale = arg.get_output_scale_val();
quantize_params.out_shift = arg.get_output_shift_val();
for (size_t i = 1; i < arg.inputs_count(); i++) {
quantize_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
}
const auto& output_layout = arg.get_output_layout();
quantize_params.output = convert_data_tensor(output_layout);
auto& kernel_selector = kernel_selector::quantize_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(quantize_params, quantize_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto quantize = new quantize_gpu(arg, best_kernels[0]);
return quantize;
}
};
namespace detail {
attach_quantize_gpu::attach_quantize_gpu() {
auto val_fw = quantize_gpu::create;
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fs_b_yx_fsv32), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fs_b_yx_fsv32), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::fs_b_yx_fsv32), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,86 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "register_gpu.hpp"
namespace cldnn { namespace gpu {
#define REGISTER_GPU(prim) \
static detail::attach_##prim##_gpu attach_##prim
void register_implementations_gpu() {
REGISTER_GPU(activation);
REGISTER_GPU(arg_max_min);
REGISTER_GPU(average_unpooling);
REGISTER_GPU(binary_convolution);
REGISTER_GPU(border);
REGISTER_GPU(broadcast);
REGISTER_GPU(concatenation);
REGISTER_GPU(condition);
REGISTER_GPU(convolution);
REGISTER_GPU(crop);
REGISTER_GPU(custom_gpu_primitive);
REGISTER_GPU(data);
REGISTER_GPU(deconvolution);
REGISTER_GPU(deformable_conv);
REGISTER_GPU(deformable_interp);
REGISTER_GPU(depth_to_space);
REGISTER_GPU(batch_to_space);
REGISTER_GPU(detection_output);
REGISTER_GPU(eltwise);
REGISTER_GPU(fully_connected);
REGISTER_GPU(gather);
REGISTER_GPU(gather_nd);
REGISTER_GPU(gemm);
REGISTER_GPU(input_layout);
REGISTER_GPU(lrn);
REGISTER_GPU(lstm_gemm);
REGISTER_GPU(lstm_elt);
REGISTER_GPU(max_unpooling);
REGISTER_GPU(mutable_data);
REGISTER_GPU(mvn);
REGISTER_GPU(normalize);
REGISTER_GPU(one_hot);
REGISTER_GPU(permute);
REGISTER_GPU(pooling);
REGISTER_GPU(prior_box);
REGISTER_GPU(proposal);
REGISTER_GPU(pyramid_roi_align);
REGISTER_GPU(quantize);
REGISTER_GPU(reduce);
REGISTER_GPU(region_yolo);
REGISTER_GPU(reorder);
REGISTER_GPU(reorg_yolo);
REGISTER_GPU(reshape);
REGISTER_GPU(reverse_sequence);
REGISTER_GPU(roi_pooling);
REGISTER_GPU(scale);
REGISTER_GPU(scatter_update);
REGISTER_GPU(scatter_nd_update);
REGISTER_GPU(scatter_elements_update);
REGISTER_GPU(select);
REGISTER_GPU(shuffle_channels);
REGISTER_GPU(softmax);
REGISTER_GPU(space_to_batch);
REGISTER_GPU(space_to_depth);
REGISTER_GPU(strided_slice);
REGISTER_GPU(tile);
REGISTER_GPU(fused_conv_eltwise);
REGISTER_GPU(lstm_dynamic_input);
REGISTER_GPU(lstm_dynamic_timeloop);
REGISTER_GPU(generic_layer);
REGISTER_GPU(gather_tree);
REGISTER_GPU(resample);
REGISTER_GPU(non_max_suppression);
REGISTER_GPU(grn);
REGISTER_GPU(ctc_greedy_decoder);
REGISTER_GPU(cum_sum);
REGISTER_GPU(embedding_bag);
REGISTER_GPU(extract_image_patches);
REGISTER_GPU(loop);
}
} // namespace gpu
} // namespace cldnn

View File

@ -1,144 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "scale_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "kernel_selector_helper.h"
#include "eltwise/eltwise_kernel_selector.h"
#include "eltwise/eltwise_kernel_base.h"
#include "cldnn/runtime/error_handler.hpp"
using namespace cldnn;
namespace cldnn {
namespace gpu {
struct scale_gpu : typed_primitive_gpu_impl<scale> {
using parent = typed_primitive_gpu_impl<scale>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<scale_gpu>(*this);
}
protected:
kernel_arguments_data get_arguments(typed_primitive_inst<scale>& instance, int32_t split) const override {
kernel_arguments_data args = parent::get_arguments(instance, split);
args.inputs = {instance.input_memory_ptr(), instance.scale_memory()};
args.output = instance.output_memory_ptr();
if (_outer.bias_term()) {
args.inputs.push_back(instance.bias_memory());
}
return args;
}
public:
static primitive_impl* create(const scale_node& arg) {
auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
auto ew_optional_params =
get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
ew_params.inputs.push_back(convert_data_tensor(arg.scale_in().get_output_layout()));
ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0),
kernel_selector::eltwise_params::InputType::Buffer(1)},
kernel_selector::eltwise_mode::MUL});
if (arg.bias_term()) {
ew_params.inputs.push_back(convert_data_tensor(arg.bias().get_output_layout()));
ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(0),
kernel_selector::eltwise_params::InputType::Buffer(2)},
kernel_selector::eltwise_mode::ADD});
}
ew_params.layoutBased = true;
auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto scale = new scale_gpu(arg, best_kernels[0]);
return scale;
}
};
namespace detail {
attach_scale_gpu::attach_scale_gpu() {
auto val_fw = scale_gpu::create;
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_yx_fsv16), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::fs_b_yx_fsv32), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_yx_fsv4), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_yx_fsv32), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), val_fw);
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv32), val_fw);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,70 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "select_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "select/select_kernel_selector.h"
#include "select/select_kernel_base.h"
namespace cldnn {
namespace gpu {
struct select_gpu : typed_primitive_gpu_impl<select> {
using parent = typed_primitive_gpu_impl<select>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<select_gpu>(*this);
}
public:
static primitive_impl* create(const select_node& arg) {
auto select_params = get_default_params<kernel_selector::select_params>(arg);
auto select_optional_params =
get_default_optional_params<kernel_selector::select_optional_params>(arg.get_program());
for (size_t i = 1; i < arg.inputs_count(); i++) {
select_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
}
auto& kernel_selector = kernel_selector::select_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(select_params, select_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto select = new select_gpu(arg, best_kernels[0]);
return select;
}
};
namespace detail {
attach_select_gpu::attach_select_gpu() {
implementation_map<select>::add(
{{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), select_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), select_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), select_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), select_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), select_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), select_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), select_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), select_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), select_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), select_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), select_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), select_gpu::create}});
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,85 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shuffle_channels_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "kernel_selector_helper.h"
#include "shuffle_channels/shuffle_channels_kernel_selector.h"
#include "shuffle_channels/shuffle_channels_kernel_ref.h"
#include "cldnn/runtime/error_handler.hpp"
using namespace cldnn;
namespace cldnn {
namespace gpu {
struct shuffle_channels_gpu : typed_primitive_gpu_impl<shuffle_channels> {
using parent = typed_primitive_gpu_impl<shuffle_channels>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<shuffle_channels_gpu>(*this);
}
public:
static primitive_impl* create(const shuffle_channels_node& arg) {
auto shuffle_channels_params = get_default_params<kernel_selector::shuffle_channels_params>(arg);
auto shuffle_channels_optional_params =
get_default_optional_params<kernel_selector::shuffle_channels_optional_params>(arg.get_program());
const int32_t number_of_dims = 4;
int32_t axis = arg.get_primitive()->axis;
if (axis < 0)
axis += number_of_dims;
shuffle_channels_params.group = arg.get_primitive()->group;
shuffle_channels_params.axis = axis;
auto& kernel_selector = kernel_selector::shuffle_channels_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(shuffle_channels_params, shuffle_channels_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto shuffle_channels = new shuffle_channels_gpu(arg, best_kernels[0]);
return shuffle_channels;
}
};
namespace detail {
attach_shuffle_channels_gpu::attach_shuffle_channels_gpu() {
auto val_fw = shuffle_channels_gpu::create;
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fs_b_yx_fsv32), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,77 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "space_to_batch_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "kernel_selector_helper.h"
#include "space_to_batch/space_to_batch_kernel_selector.h"
#include "space_to_batch/space_to_batch_kernel_ref.h"
#include "cldnn/runtime/error_handler.hpp"
#include "data_inst.h"
#include <vector>
using namespace cldnn;
namespace cldnn {
namespace gpu {
struct space_to_batch_gpu : typed_primitive_gpu_impl<space_to_batch> {
using parent = typed_primitive_gpu_impl<space_to_batch>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<space_to_batch_gpu>(*this);
}
public:
static primitive_impl* create(const space_to_batch_node& arg) {
auto space_to_batch_params = get_default_params<kernel_selector::space_to_batch_params>(arg);
auto space_to_batch_optional_params =
get_default_optional_params<kernel_selector::space_to_batch_optional_params>(arg.get_program());
auto primitive = arg.get_primitive();
space_to_batch_params.block_shape = convert_dim_vector(primitive->block_shape);
space_to_batch_params.pads_begin = convert_dim_vector(primitive->pads_begin);
space_to_batch_params.pads_end = convert_dim_vector(primitive->pads_end);
auto& kernel_selector = kernel_selector::space_to_batch_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(space_to_batch_params, space_to_batch_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto space_to_batch = new space_to_batch_gpu(arg, best_kernels[0]);
return space_to_batch;
}
};
namespace detail {
attach_space_to_batch_gpu::attach_space_to_batch_gpu() {
auto val_fw = space_to_batch_gpu::create;
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,79 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "space_to_depth_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "kernel_selector_helper.h"
#include "space_to_depth/space_to_depth_kernel_selector.h"
#include "space_to_depth/space_to_depth_kernel_ref.h"
#include "cldnn/runtime/error_handler.hpp"
using namespace cldnn;
namespace cldnn {
namespace gpu {
struct space_to_depth_gpu : typed_primitive_gpu_impl<space_to_depth> {
using parent = typed_primitive_gpu_impl<space_to_depth>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<space_to_depth_gpu>(*this);
}
public:
static primitive_impl* create(const space_to_depth_node& arg) {
auto space_to_depth_params = get_default_params<kernel_selector::space_to_depth_params>(arg);
auto space_to_depth_optional_params =
get_default_optional_params<kernel_selector::space_to_depth_optional_params>(arg.get_program());
space_to_depth_params.depth_mode = (arg.get_primitive()->mode == space_to_depth::blocks_first) ?
kernel_selector::SpaceToDepthMode::BLOCKS_FIRST :
kernel_selector::SpaceToDepthMode::DEPTH_FIRST;
space_to_depth_params.block_size = arg.get_primitive()->block_size;
auto& kernel_selector = kernel_selector::space_to_depth_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(space_to_depth_params, space_to_depth_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto space_to_depth = new space_to_depth_gpu(arg, best_kernels[0]);
return space_to_depth;
}
};
namespace detail {
attach_space_to_depth_gpu::attach_space_to_depth_gpu() {
auto val_fw = space_to_depth_gpu::create;
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -1,72 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "tile_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "kernel_selector_helper.h"
#include "tile/tile_kernel_selector.h"
#include "tile/tile_kernel_ref.h"
#include "cldnn/runtime/error_handler.hpp"
using namespace cldnn;
namespace cldnn {
namespace gpu {
struct tile_gpu : typed_primitive_gpu_impl<tile> {
using parent = typed_primitive_gpu_impl<tile>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<tile_gpu>(*this);
}
public:
static primitive_impl* create(const tile_node& arg) {
auto tile_params = get_default_params<kernel_selector::tile_params>(arg);
auto tile_optional_params =
get_default_optional_params<kernel_selector::tile_optional_params>(arg.get_program());
auto& kernel_selector = kernel_selector::tile_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(tile_params, tile_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto tile = new tile_gpu(arg, best_kernels[0]);
return tile;
}
};
namespace detail {
attach_tile_gpu::attach_tile_gpu() {
auto val_fw = tile_gpu::create;
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -51,7 +51,7 @@ void add_required_reorders::run(program_impl& p) {
continue; // only nodes with dependencies
if (usr->is_type<data>())
continue;
if (usr->type()->does_an_implementation_exist(p.get_engine(), *usr))
if (usr->type()->does_an_implementation_exist(*usr))
continue;
bool correct_layout_selected = false;
@ -71,7 +71,7 @@ void add_required_reorders::run(program_impl& p) {
node->get_output_layout().format,
original_layout.size);
usr->set_output_layout(current_layout, false);
if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
if (usr->type()->does_possible_implementation_exist(*usr)) {
correct_layout_selected = true;
break;
} else if (original_layout.data_type == data_types::i64) {
@ -80,14 +80,14 @@ void add_required_reorders::run(program_impl& p) {
current_layout = original_layout;
current_layout.data_type = data_types::i32;
usr->set_output_layout(current_layout, false);
if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
if (usr->type()->does_possible_implementation_exist(*usr)) {
correct_layout_selected = true;
} else {
current_layout = original_layout;
current_layout.data_type = data_types::i32;
current_layout.format = node->get_output_layout().format;
usr->set_output_layout(current_layout, false);
if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
if (usr->type()->does_possible_implementation_exist(*usr)) {
correct_layout_selected = true;
}
}
@ -148,7 +148,7 @@ void add_required_reorders::run(program_impl& p) {
new_layout_format,
original_layout.size);
usr->set_output_layout(current_layout, false);
if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
if (usr->type()->does_possible_implementation_exist(*usr)) {
correct_layout_selected = true;
break;
}
@ -164,7 +164,7 @@ void add_required_reorders::run(program_impl& p) {
usr->set_output_layout(original_layout_i32, false);
if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
if (usr->type()->does_possible_implementation_exist(*usr)) {
correct_layout_selected = true;
}
@ -174,7 +174,7 @@ void add_required_reorders::run(program_impl& p) {
new_layout_format,
original_layout_i32.size);
usr->set_output_layout(current_layout_i32, false);
if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
if (usr->type()->does_possible_implementation_exist(*usr)) {
correct_layout_selected = true;
break;
}

View File

@ -42,7 +42,7 @@ void compile_graph::run(program_impl& p) {
auto& node = *(std::next(proc_order.begin(), i));
node->set_unique_id(std::to_string(i));
if (!node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty())) {
node->selected_impl = node->type()->choose_impl(p.get_engine(), *node);
node->selected_impl = node->type()->choose_impl(*node);
}
}
});
@ -51,7 +51,7 @@ void compile_graph::run(program_impl& p) {
#else
for (auto& node : p.get_processing_order()) {
if (!node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty())) {
node->selected_impl = node->type()->choose_impl(p.get_engine(), *node);
node->selected_impl = node->type()->choose_impl(*node);
}
}
#endif

View File

@ -5,7 +5,7 @@
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "pass_manager.h"
#include "gpu/primitive_gpu_base.h"
#include "impls/ocl/primitive_base.hpp"
#include "fully_connected/fully_connected_params.h"
#include <memory>
#include <stdexcept>
@ -45,7 +45,7 @@ void post_input_reorder::run(program_impl& p) {
const auto impl = node->get_selected_impl();
// add a reorder if primitive's input format doesn't match implementation's input format
if (node->is_type<fully_connected>()) {
const auto& fc_impl = dynamic_cast<const gpu::typed_primitive_gpu_impl<fully_connected>&>(*impl);
const auto& fc_impl = dynamic_cast<const ocl::typed_primitive_impl_ocl<fully_connected>&>(*impl);
const auto& fc_params = *static_cast<kernel_selector::fully_connected_params*>(fc_impl._kernel_data.params.get());
auto layout_format = from_data_layout(fc_params.inputs[0].GetLayout());
@ -62,7 +62,7 @@ void post_input_reorder::run(program_impl& p) {
reorder.set_unique_id(node->get_unique_id() + "_input_reorder");
reorder.get_output_layout(false);
node->set_output_layout(previous_layout, false);
reorder.set_selected_impl(reorder.type()->choose_impl(p.get_engine(), reorder));
reorder.set_selected_impl(reorder.type()->choose_impl(reorder));
}
}
}

View File

@ -56,7 +56,7 @@ void post_optimize_weights::optimize_weights(T& node, program_impl& p) {
// Don't run impl selection to avoid double compilation of reorder kernels
// in main program and internal program for constant propagation
if (!g_node.is_constant())
g_node.selected_impl = g_node.type()->choose_impl(p.get_engine(), g_node);
g_node.selected_impl = g_node.type()->choose_impl(g_node);
}
}

View File

@ -27,9 +27,8 @@ void remove_redundant_reorders::run(program_impl& p) {
if (!update_implementations)
return;
auto& eng = p.get_engine();
node.set_unique_id(node.get_unique_id() + "_reorder");
auto new_impl = node.type()->choose_impl(eng, node);
auto new_impl = node.type()->choose_impl(node);
node.set_selected_impl(std::move(new_impl));
};
@ -300,7 +299,7 @@ void remove_redundant_reorders::run(program_impl& p) {
continue;
input.set_output_layout(output_layout, false);
if (input.type()->does_possible_implementation_exist(p.get_engine(), input)) {
if (input.type()->does_possible_implementation_exist(input)) {
p.replace_all_usages(node, input);
p.add_optimized_primitive_info(node.id());
p.remove_all_connections(node);

View File

@ -58,7 +58,10 @@ std::map<program_node*, format::type> get_preferred_formats(program_impl& p, lay
continue;
auto ex = lo.get_preferred_format(*n);
auto impl = lo.get_preferred_impl_type(*n);
fmt_map[n] = ex;
n->set_preferred_impl_type(impl);
}
return fmt_map;
}

View File

@ -4,23 +4,23 @@
#include "condition_inst.h"
#include "network_impl.h"
#include "implementation_map.h"
#include "register_gpu.hpp"
#include "impls/implementation_map.hpp"
#include "register.hpp"
#include <algorithm>
#include <vector>
namespace cldnn {
namespace gpu {
namespace common {
struct condition_gpu : typed_primitive_impl<condition> {
struct condition_impl : typed_primitive_impl<condition> {
const condition_node& outer;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<condition_gpu>(*this);
return make_unique<condition_impl>(*this);
}
explicit condition_gpu(const condition_node& outer) : outer(outer) {}
explicit condition_impl(const condition_node& outer) : outer(outer) {}
event::ptr execute_impl(const std::vector<event::ptr>& events, condition_inst& instance) override {
for (auto& a : events) {
@ -42,7 +42,7 @@ struct condition_gpu : typed_primitive_impl<condition> {
return ev;
}
static primitive_impl* create(const condition_node& arg) { return new condition_gpu(arg); }
static primitive_impl* create(const condition_node& arg) { return new condition_impl(arg); }
void init_kernels() override {}
@ -117,13 +117,13 @@ private:
namespace detail {
attach_condition_gpu::attach_condition_gpu() {
implementation_map<condition>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
condition_gpu::create);
implementation_map<condition>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
condition_gpu::create);
attach_condition_common::attach_condition_common() {
implementation_map<condition>::add(impl_types::common, condition_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f32, format::yxfb),
});
}
} // namespace detail
} // namespace gpu
} // namespace common
} // namespace cldnn

View File

@ -5,25 +5,25 @@
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "loop_inst.h"
#include "network_impl.h"
#include "implementation_map.h"
#include "register_gpu.hpp"
#include "impls/implementation_map.hpp"
#include "register.hpp"
#include "mutable_data_inst.h"
#include "input_layout_inst.h"
#include <vector>
#include <algorithm>
namespace cldnn {
namespace gpu {
struct loop_gpu : typed_primitive_impl<loop> {
namespace common {
struct loop_impl : typed_primitive_impl<loop> {
const loop_node& node;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<loop_gpu>(*this);
return make_unique<loop_impl>(*this);
}
void init_kernels() override {}
loop_gpu(const loop_gpu& other) : typed_primitive_impl<loop>(other), node(other.node) {}
explicit loop_gpu(const loop_node& node) : node(node) {}
loop_impl(const loop_impl& other) : typed_primitive_impl<loop>(other), node(other.node) {}
explicit loop_impl(const loop_node& node) : node(node) {}
// read scala value from data primitive
static int64_t read_scalar_value(memory::ptr mem, stream& stream) {
@ -216,14 +216,14 @@ struct loop_gpu : typed_primitive_impl<loop> {
return ev;
}
static primitive_impl* create(const loop_node& arg) { return new loop_gpu(arg); }
static primitive_impl* create(const loop_node& arg) { return new loop_impl(arg); }
};
namespace detail {
attach_loop_gpu::attach_loop_gpu() {
implementation_map<loop>::add({{engine_types::ocl, loop_gpu::create}});
attach_loop_common::attach_loop_common() {
implementation_map<loop>::add(impl_types::common, loop_impl::create, {});
}
} // namespace detail
} // namespace gpu
} // namespace common
} // namespace cldnn

View File

@ -0,0 +1,22 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "register.hpp"
namespace cldnn {
namespace common {
#define REGISTER_COMMON(prim) \
static detail::attach_##prim##_common attach_##prim
void register_implementations() {
REGISTER_COMMON(condition);
REGISTER_COMMON(data);
REGISTER_COMMON(input_layout);
REGISTER_COMMON(loop);
REGISTER_COMMON(prior_box);
}
} // namespace common
} // namespace cldnn

View File

@ -0,0 +1,35 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "cldnn/primitives/condition.hpp"
#include "cldnn/primitives/loop.hpp"
#include "cldnn/primitives/data.hpp"
#include "cldnn/primitives/input_layout.hpp"
#include "cldnn/primitives/prior_box.hpp"
namespace cldnn {
namespace common {
void register_implementations();
namespace detail {
#define REGISTER_COMMON(prim) \
struct attach_##prim##_common { \
attach_##prim##_common(); \
}
REGISTER_COMMON(condition);
REGISTER_COMMON(data);
REGISTER_COMMON(input_layout);
REGISTER_COMMON(loop);
REGISTER_COMMON(prior_box);
#undef REGISTER_COMMON
} // namespace detail
} // namespace common
} // namespace cldnn

View File

@ -6,21 +6,20 @@
#include "data_inst.h"
#include "prior_box_inst.h"
#include "input_layout_inst.h"
#include "implementation_map.h"
#include "register_gpu.hpp"
#include "impls/implementation_map.hpp"
#include "register.hpp"
#include "network_impl.h"
#include <vector>
namespace cldnn {
namespace gpu {
namespace common {
class wait_for_events_gpu : public primitive_impl {
class wait_for_events_impl : public primitive_impl {
public:
explicit wait_for_events_gpu(const program_node& /*node*/) {}
explicit wait_for_events_impl(const program_node& /*node*/) {}
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<wait_for_events_gpu>(*this);
return make_unique<wait_for_events_impl>(*this);
}
void init_kernels() override {}
@ -33,32 +32,32 @@ public:
bool validate(const primitive_inst&) const override { return true; }
static primitive_impl* create_data(const data_node& data) { return new wait_for_events_gpu(data); }
static primitive_impl* create_data(const data_node& data) { return new wait_for_events_impl(data); }
static primitive_impl* create_input_layout(const input_layout_node& input) {
return new wait_for_events_gpu(input);
return new wait_for_events_impl(input);
}
static primitive_impl* create_prior_box(const prior_box_node& prior_box) {
// This primitive is being executed on CPU during network compilation.
return new wait_for_events_gpu(prior_box);
return new wait_for_events_impl(prior_box);
}
};
namespace detail {
attach_data_gpu::attach_data_gpu() {
implementation_map<data>::add({ {engine_types::ocl, wait_for_events_gpu::create_data} });
attach_data_common::attach_data_common() {
implementation_map<data>::add(impl_types::common, wait_for_events_impl::create_data, {});
}
attach_input_layout_gpu::attach_input_layout_gpu() {
implementation_map<input_layout>::add({{engine_types::ocl, wait_for_events_gpu::create_input_layout}});
attach_input_layout_common::attach_input_layout_common() {
implementation_map<input_layout>::add(impl_types::common, wait_for_events_impl::create_input_layout, {});
}
attach_prior_box_gpu::attach_prior_box_gpu() {
implementation_map<prior_box>::add({{engine_types::ocl, wait_for_events_gpu::create_prior_box}});
attach_prior_box_common::attach_prior_box_common() {
implementation_map<prior_box>::add(impl_types::common, wait_for_events_impl::create_prior_box, {});
}
} // namespace detail
} // namespace gpu
} // namespace common
} // namespace cldnn

View File

@ -4,9 +4,9 @@
#include "detection_output_inst.h"
#include "network_impl.h"
#include "implementation_map.h"
#include "impls/implementation_map.hpp"
#include "math_utils.h"
#include "register_gpu.hpp"
#include "register.hpp"
#include "cpu_impl_helpers.hpp"
#include <algorithm>
@ -24,7 +24,7 @@
#endif
namespace cldnn {
namespace gpu {
namespace cpu {
namespace {
using bounding_box = cldnn::cpu::bounding_box;
@ -43,15 +43,15 @@ bool comp_score_descend<std::pair<int, int>>(const std::pair<float, std::pair<in
}
/************************ Detection Output CPU ************************/
struct detection_output_cpu : typed_primitive_impl<detection_output> {
struct detection_output_impl : typed_primitive_impl<detection_output> {
enum NMSType {CAFFE, MXNET};
const detection_output_node& outer;
NMSType nms_type;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<detection_output_cpu>(*this);
return make_unique<detection_output_impl>(*this);
}
explicit detection_output_cpu(const detection_output_node& outer)
explicit detection_output_impl(const detection_output_node& outer)
: outer(outer)
, nms_type(outer.get_primitive()->decrease_label_id ? MXNET : CAFFE) {}
@ -822,17 +822,19 @@ struct detection_output_cpu : typed_primitive_impl<detection_output> {
void init_kernels() override {}
static primitive_impl* create(const detection_output_node& arg) { return new detection_output_cpu(arg); }
static primitive_impl* create(const detection_output_node& arg) { return new detection_output_impl(arg); }
};
namespace detail {
attach_detection_output_gpu::attach_detection_output_gpu() {
implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), detection_output_cpu::create);
implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), detection_output_cpu::create);
attach_detection_output_impl::attach_detection_output_impl() {
implementation_map<detection_output>::add(impl_types::cpu, detection_output_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx)
});
}
} // namespace detail
} // namespace gpu
} // namespace cpu
} // namespace cldnn

View File

@ -5,7 +5,7 @@
#include "non_max_suppression_inst.h"
#include "primitive_inst.h"
#include "network_impl.h"
#include "register_gpu.hpp"
#include "register.hpp"
#include "cpu_impl_helpers.hpp"
#include <vector>
@ -15,7 +15,7 @@
#include <tuple>
namespace cldnn {
namespace {
namespace cpu {
using namespace cldnn::cpu;
@ -372,14 +372,14 @@ void run(non_max_suppression_inst& instance) {
store_result(stream, instance.output_memory_ptr(), result);
}
struct non_max_suppression_cpu : typed_primitive_impl<non_max_suppression> {
struct non_max_suppression_impl : typed_primitive_impl<non_max_suppression> {
using parent = typed_primitive_impl<non_max_suppression>;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<non_max_suppression_cpu>(*this);
return make_unique<non_max_suppression_impl>(*this);
}
non_max_suppression_cpu() : parent(kernel_selector::weights_reorder_params(), "non_max_suppression_cpu") {}
non_max_suppression_impl() : parent(kernel_selector::weights_reorder_params(), "non_max_suppression_impl") {}
virtual event::ptr execute_impl(const std::vector<event::ptr>& event, typed_primitive_inst<non_max_suppression>& instance) {
for (auto e : event) {
@ -396,23 +396,20 @@ struct non_max_suppression_cpu : typed_primitive_impl<non_max_suppression> {
}
static primitive_impl* create(const non_max_suppression_node&) {
return new non_max_suppression_cpu();
return new non_max_suppression_impl();
}
void init_kernels() override {}
};
} // namespace
namespace gpu {
namespace detail {
attach_non_max_suppression_gpu::attach_non_max_suppression_gpu() {
implementation_map<non_max_suppression>::add({
{std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), non_max_suppression_cpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), non_max_suppression_cpu::create},
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), non_max_suppression_cpu::create}
attach_non_max_suppression_impl::attach_non_max_suppression_impl() {
implementation_map<non_max_suppression>::add(impl_types::cpu, non_max_suppression_impl::create, {
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::f32, format::bfyx),
});
}
} // namespace detail
} // namespace gpu
} // namespace cpu
} // namespace cldnn

View File

@ -4,10 +4,10 @@
#include "proposal_inst.h"
#include "cldnn/runtime/engine.hpp"
#include "implementation_map.h"
#include "impls/implementation_map.hpp"
#include "network_impl.h"
#include "cldnn/runtime/error_handler.hpp"
#include "register_gpu.hpp"
#include "register.hpp"
#include <algorithm>
#include <string>
@ -17,7 +17,7 @@
#define EPSILON 0.00001f
namespace cldnn {
namespace gpu {
namespace cpu {
namespace {
@ -190,13 +190,13 @@ struct im_info_t {
int min_bbox_y;
};
struct proposal_gpu : typed_primitive_impl<proposal> {
struct proposal_impl : typed_primitive_impl<proposal> {
const proposal_node& outer;
explicit proposal_gpu(const proposal_node& arg) : outer(arg) {}
explicit proposal_impl(const proposal_node& arg) : outer(arg) {}
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<proposal_gpu>(*this);
return make_unique<proposal_impl>(*this);
}
template <typename dtype>
@ -442,19 +442,19 @@ struct proposal_gpu : typed_primitive_impl<proposal> {
CLDNN_ERROR_MESSAGE(arg.id(), "image_info must have either 3, 4 or 6 items");
}
return new proposal_gpu(arg);
return new proposal_impl(arg);
}
};
namespace detail {
attach_proposal_gpu::attach_proposal_gpu() {
implementation_map<proposal>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
proposal_gpu::create);
implementation_map<proposal>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
proposal_gpu::create);
attach_proposal_impl::attach_proposal_impl() {
implementation_map<proposal>::add(impl_types::cpu, proposal_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx)
});
}
} // namespace detail
} // namespace gpu
} // namespace cpu
} // namespace cldnn

View File

@ -0,0 +1,20 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "register.hpp"
namespace cldnn {
namespace cpu {
#define REGISTER_CPU(prim) \
static detail::attach_##prim##_impl attach_##prim
void register_implementations() {
REGISTER_CPU(detection_output);
REGISTER_CPU(proposal);
REGISTER_CPU(non_max_suppression);
}
} // namespace cpu
} // namespace cldnn

View File

@ -0,0 +1,31 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "cldnn/primitives/detection_output.hpp"
#include "cldnn/primitives/proposal.hpp"
#include "cldnn/primitives/non_max_suppression.hpp"
namespace cldnn {
namespace cpu {
void register_implementations();
namespace detail {
#define REGISTER_CPU(prim) \
struct attach_##prim##_impl { \
attach_##prim##_impl(); \
}
REGISTER_CPU(proposal);
REGISTER_CPU(non_max_suppression);
REGISTER_CPU(detection_output);
#undef REGISTER_CPU
} // namespace detail
} // namespace cpu
} // namespace cldnn

View File

@ -0,0 +1,188 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <map>
#include <functional>
#include <typeinfo>
#include <tuple>
#include <string>
namespace cldnn {
template <typename T, typename U>
class singleton_map : public std::map<T, U> {
singleton_map() : std::map<T, U>() {}
singleton_map(singleton_map const&) = delete;
void operator=(singleton_map const&) = delete;
public:
static singleton_map& instance() {
static singleton_map instance_;
return instance_;
}
};
struct permute;
struct reorder;
struct custom_gpu_primitive;
struct generic_layer;
struct reshape;
struct data;
struct mutable_data;
struct input_layout;
struct prior_box;
struct loop;
struct primitive_impl;
template <class PType>
struct typed_program_node;
template <typename primitive_kind>
struct implementation_key {
typedef std::tuple<data_types, format::type> type;
type operator()(const typed_program_node<primitive_kind>& primitive) {
return std::make_tuple(primitive.get_dependency(0).get_output_layout().data_type,
primitive.get_dependency(0).get_output_layout().format);
}
type operator()(const layout& proposed_layout) {
return std::make_tuple(proposed_layout.data_type, proposed_layout.format);
}
};
template <>
struct implementation_key<permute> {
typedef int32_t type;
type operator()(const typed_program_node<permute>&) { return -1; }
type operator()(const layout&) { return -1; }
};
template <>
struct implementation_key<reorder> {
typedef int32_t type;
type operator()(const typed_program_node<reorder>&) { return -1; }
type operator()(const layout&) { return -1; }
};
template <>
struct implementation_key<generic_layer> {
typedef int32_t type;
type operator()(const typed_program_node<generic_layer>&) { return -1; }
type operator()(const layout&) { return -1; }
};
template <>
struct implementation_key<custom_gpu_primitive> {
typedef int32_t type;
type operator()(const typed_program_node<custom_gpu_primitive>&) { return -1; }
type operator()(const layout&) { return -1; }
};
template <>
struct implementation_key<reshape> {
typedef int32_t type;
type operator()(const typed_program_node<reshape>&) { return -1; }
type operator()(const layout&) { return -1; }
};
template <>
struct implementation_key<data> {
typedef int32_t type;
type operator()(const typed_program_node<data>&) { return -1; }
type operator()(const layout&) { return -1; }
};
template <>
struct implementation_key<mutable_data> {
typedef int32_t type;
type operator()(const typed_program_node<mutable_data>&) { return -1; }
type operator()(const layout&) { return -1; }
};
template <>
struct implementation_key<input_layout> {
typedef int32_t type;
type operator()(const typed_program_node<input_layout>&) { return -1; }
type operator()(const layout&) { return -1; }
};
template <>
struct implementation_key<prior_box> {
typedef int32_t type;
type operator()(const typed_program_node<prior_box>&) { return -1; }
type operator()(const layout&) { return -1; }
};
template <>
struct implementation_key<loop> {
typedef int32_t type;
type operator()(const typed_program_node<loop>&) { return -1; }
type operator()(const layout&) { return -1; }
};
template <typename primitive_kind>
class implementation_map {
public:
using key_builder = implementation_key<primitive_kind>;
using key_type = typename key_builder::type;
using factory_type = std::function<primitive_impl*(const typed_program_node<primitive_kind>&)>;
using map_type = singleton_map<impl_types, std::pair<std::set<key_type>, factory_type>>;
static factory_type get(const typed_program_node<primitive_kind>& primitive) {
impl_types target_impl_type = primitive.get_preferred_impl_type();
// lookup in database; throw if not found
auto key = key_builder()(primitive);
for (auto& kv : map_type::instance()) {
impl_types impl_type = kv.first;
if ((target_impl_type & impl_type) != impl_type)
continue;
std::set<key_type>& keys_set = kv.second.first;
auto& factory = kv.second.second;
if (keys_set.empty() || keys_set.find(key) != keys_set.end()) {
return factory;
}
}
throw std::runtime_error(std::string("implementation_map for ") + typeid(primitive_kind).name() +
" could not find any implementation to match key");
}
// check if for a given engine and type there exist an implementation
static bool check(const typed_program_node<primitive_kind>& primitive) {
impl_types target_impl_type = primitive.get_preferred_impl_type();
auto key = key_builder()(primitive);
return check_key(target_impl_type, key);
}
// check if there exists a kernel implementation of a primitive with output set it primitive's output layout
static bool check_io_eq(const typed_program_node<primitive_kind>& primitive) {
impl_types target_impl_type = primitive.get_preferred_impl_type();
auto key = key_builder()(primitive.get_output_layout());
return check_key(target_impl_type, key);
}
static bool check_key(impl_types target_impl_type, key_type key) {
for (auto& kv : map_type::instance()) {
impl_types impl_type = kv.first;
if ((target_impl_type & impl_type) != impl_type)
continue;
std::set<key_type>& keys_set = kv.second.first;
if (keys_set.empty())
return true;
return keys_set.find(key) != keys_set.end();
}
return false;
}
static void add(impl_types impl_type, factory_type factory, std::set<key_type> keys) {
if (impl_type == impl_types::any) {
throw std::runtime_error("[CLDNN] Can't register impl with type any");
}
map_type::instance().insert({impl_type, {keys, factory}});
}
};
} // namespace cldnn

View File

@ -0,0 +1,123 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "activation_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "activation/activation_kernel_selector.h"
#include "activation/activation_kernel_base.h"
namespace cldnn {
namespace ocl {
struct activation_impl : typed_primitive_impl_ocl<activation> {
using parent = typed_primitive_impl_ocl<activation>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<activation_impl>(*this);
}
kernel_arguments_data get_arguments(typed_primitive_inst<activation>& instance, int32_t split) const override {
kernel_arguments_data args = parent::get_arguments(instance, split);
if (_outer.is_parameterized()) {
args.slope = instance.slope_memory();
}
return args;
}
static primitive_impl* create(const activation_node& arg) {
auto activation_params = get_default_params<kernel_selector::activation_params>(arg);
auto activation_optional_params =
get_default_optional_params<kernel_selector::activation_optional_params>(arg.get_program());
convert_new_activation_func(arg.get_primitive(), activation_params.activations);
if (arg.is_parameterized()) {
const auto& slope_layout = arg.slope_input().get_output_layout();
const auto& output_layout = arg.get_output_layout();
const auto params_num =
kernel_selector::GetActivationAdditionalParamsNumber(activation_params.activations[0].function);
CLDNN_ERROR_LESS_THAN(arg.id(),
"Slope layout size count",
slope_layout.size.count(),
"output_layout.size.feature[0] * params_num",
static_cast<size_t>(output_layout.size.feature[0] * params_num),
"Error - not enough data inside additional params buffer");
activation_params.inputActivationParams.push_back(convert_data_tensor(slope_layout));
}
auto& kernel_selector = kernel_selector::activation_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(activation_params, activation_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto activation = new activation_impl(arg, best_kernels[0]);
return activation;
}
};
namespace detail {
attach_activation_impl::attach_activation_impl() {
implementation_map<activation>::add(impl_types::ocl, activation_impl::create, {
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::i8, format::yxfb),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::i8, format::byxf),
std::make_tuple(data_types::u8, format::yxfb),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::u8, format::byxf),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::i32, format::byxf),
std::make_tuple(data_types::i32, format::yxfb),
// block f16 format
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
// 3D
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::i32, format::bfzyx),
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
// bfwzyx
std::make_tuple(data_types::f32, format::bfwzyx),
std::make_tuple(data_types::f16, format::bfwzyx),
std::make_tuple(data_types::i32, format::bfwzyx),
std::make_tuple(data_types::i8, format::bfwzyx),
std::make_tuple(data_types::u8, format::bfwzyx),
// fs_b_yx_fsv32
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -3,8 +3,8 @@
//
#include "arg_max_min_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "arg_max_min/arg_max_min_kernel_selector.h"
@ -12,14 +12,14 @@
#include "kernel_runner.h"
namespace cldnn {
namespace gpu {
namespace ocl {
struct arg_max_min_gpu : typed_primitive_gpu_impl<arg_max_min> {
using parent = typed_primitive_gpu_impl<arg_max_min>;
struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
using parent = typed_primitive_impl_ocl<arg_max_min>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<arg_max_min_gpu>(*this);
return make_unique<arg_max_min_impl>(*this);
}
protected:
@ -98,37 +98,27 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto conv = new arg_max_min_gpu(arg, best_kernels[0]);
auto conv = new arg_max_min_impl(arg, best_kernels[0]);
return conv;
}
};
namespace detail {
attach_arg_max_min_gpu::attach_arg_max_min_gpu() {
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
arg_max_min_gpu::create);
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
arg_max_min_gpu::create);
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx),
arg_max_min_gpu::create);
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
arg_max_min_gpu::create);
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
arg_max_min_gpu::create);
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
arg_max_min_gpu::create);
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
arg_max_min_gpu::create);
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
arg_max_min_gpu::create);
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
arg_max_min_gpu::create);
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
arg_max_min_gpu::create);
}
attach_arg_max_min_impl::attach_arg_max_min_impl() {
implementation_map<arg_max_min>::add(impl_types::ocl, arg_max_min_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::i8, format::yxfb),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -0,0 +1,79 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "average_unpooling_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "average_unpooling/average_unpooling_kernel_selector.h"
#include "average_unpooling/average_unpooling_kernel_base.h"
namespace cldnn {
namespace ocl {
struct average_unpooling_impl : typed_primitive_impl_ocl<average_unpooling> {
using parent = typed_primitive_impl_ocl<average_unpooling>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<average_unpooling_impl>(*this);
}
protected:
kernel_arguments_data get_arguments(typed_primitive_inst<average_unpooling>& instance, int32_t split) const override {
kernel_arguments_data args = parent::get_arguments(instance, split);
return args;
}
public:
static primitive_impl* create(const average_unpooling_node& arg) {
auto average_unpooling_params = get_default_params<kernel_selector::average_unpooling_params>(arg);
auto average_unpooling_optional_params =
get_default_optional_params<kernel_selector::average_unpooling_optional_params>(arg.get_program());
auto& params = average_unpooling_params;
auto primitive = arg.get_primitive();
auto stride = primitive->stride;
params.unpoolSize = {
(uint32_t)primitive->size.spatial[0],
(uint32_t)primitive->size.spatial[1],
};
params.unpoolStride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1]};
auto& kernel_selector = kernel_selector::average_unpooling_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(average_unpooling_params, average_unpooling_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto average_unpool = new average_unpooling_impl(arg, best_kernels[0]);
return average_unpool;
}
};
namespace detail {
attach_average_unpooling_impl::attach_average_unpooling_impl() {
implementation_map<average_unpooling>::add(impl_types::ocl, average_unpooling_impl::create, {
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::i8, format::yxfb),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::i8, format::byxf),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -0,0 +1,78 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "batch_to_space_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "batch_to_space/batch_to_space_kernel_selector.h"
#include "batch_to_space/batch_to_space_kernel_ref.h"
#include "cldnn/runtime/error_handler.hpp"
#include "data_inst.h"
#include <vector>
using namespace cldnn;
namespace cldnn {
namespace ocl {
struct batch_to_space_impl : typed_primitive_impl_ocl<batch_to_space> {
using parent = typed_primitive_impl_ocl<batch_to_space>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<batch_to_space_impl>(*this);
}
public:
static primitive_impl* create(const batch_to_space_node& arg) {
auto batch_to_space_params = get_default_params<kernel_selector::batch_to_space_params>(arg);
auto batch_to_space_optional_params =
get_default_optional_params<kernel_selector::batch_to_space_optional_params>(arg.get_program());
auto primitive = arg.get_primitive();
batch_to_space_params.block_shape = convert_dim_vector(primitive->block_shape);
batch_to_space_params.crops_begin = convert_dim_vector(primitive->crops_begin);
batch_to_space_params.crops_end = convert_dim_vector(primitive->crops_end);
auto& kernel_selector = kernel_selector::batch_to_space_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(batch_to_space_params, batch_to_space_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto batch_to_space = new batch_to_space_impl(arg, best_kernels[0]);
return batch_to_space;
}
};
namespace detail {
attach_batch_to_space_impl::attach_batch_to_space_impl() {
implementation_map<batch_to_space>::add(impl_types::ocl, batch_to_space_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::f32, format::bfwzyx),
std::make_tuple(data_types::f16, format::bfwzyx),
std::make_tuple(data_types::u8, format::bfwzyx),
std::make_tuple(data_types::i8, format::bfwzyx),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -5,8 +5,8 @@
#include "cldnn/primitives/scale.hpp"
#include "cldnn/primitives/quantize.hpp"
#include "binary_convolution_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "kernel_runner.h"
@ -16,14 +16,14 @@
#include <memory>
namespace cldnn {
namespace gpu {
namespace ocl {
struct binary_convolution_gpu : typed_primitive_gpu_impl<binary_convolution> {
using parent = typed_primitive_gpu_impl<binary_convolution>;
struct binary_convolution_impl : typed_primitive_impl_ocl<binary_convolution> {
using parent = typed_primitive_impl_ocl<binary_convolution>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<binary_convolution_gpu>(*this);
return make_unique<binary_convolution_impl>(*this);
}
protected:
@ -125,7 +125,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto conv = new binary_convolution_gpu(arg, best_kernels[0]);
auto conv = new binary_convolution_impl(arg, best_kernels[0]);
return conv;
}
@ -133,12 +133,12 @@ public:
namespace detail {
attach_binary_convolution_gpu::attach_binary_convolution_gpu() {
implementation_map<binary_convolution>::add(
std::make_tuple(engine_types::ocl, data_types::bin, format::b_fs_yx_32fp),
binary_convolution_gpu::create);
attach_binary_convolution_impl::attach_binary_convolution_impl() {
implementation_map<binary_convolution>::add(impl_types::ocl, binary_convolution_impl::create, {
std::make_tuple(data_types::bin, format::b_fs_yx_32fp),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -0,0 +1,96 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "border_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "border/border_kernel_selector.h"
#include "border/border_kernel_base.h"
#include "cldnn/runtime/error_handler.hpp"
namespace cldnn {
namespace ocl {
struct border_impl : typed_primitive_impl_ocl<border> {
using parent = typed_primitive_impl_ocl<border>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<border_impl>(*this);
}
static primitive_impl* create(const border_node& arg) {
auto b_params = get_default_params<kernel_selector::border_params>(arg, 1);
auto b_optional_params =
get_default_optional_params<kernel_selector::border_optional_params>(arg.get_program());
auto desc = arg.get_primitive();
b_params.lt_sizes = convert_dim_vector(desc->left_top_sizes);
b_params.rb_sizes = convert_dim_vector(desc->right_bottom_sizes);
b_params.border_value = desc->border_value;
switch (desc->type) {
case border_type::constant:
b_params.b_type = kernel_selector::border_type::CONSTANT;
break;
case border_type::edge:
b_params.b_type = kernel_selector::border_type::EDGE;
break;
case border_type::mirror:
b_params.b_type = kernel_selector::border_type::MIRROR;
break;
case border_type::mirror_101:
b_params.b_type = kernel_selector::border_type::MIRROR_101;
break;
default:
assert(
false &&
"Encountered unhandled enum case: border_type during translation to kernel selector enumeration.");
}
auto& kernel_selector = kernel_selector::border_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(b_params, b_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
return new border_impl(arg, best_kernels[0]);
}
};
namespace detail {
attach_border_impl::attach_border_impl() {
implementation_map<border>::add(impl_types::ocl, border_impl::create, {
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::i8, format::yxfb),
std::make_tuple(data_types::u8, format::yxfb),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::i8, format::byxf),
std::make_tuple(data_types::u8, format::byxf),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::f32, format::bfwzyx),
std::make_tuple(data_types::f16, format::bfwzyx),
std::make_tuple(data_types::i8, format::bfwzyx),
std::make_tuple(data_types::u8, format::bfwzyx),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -4,22 +4,22 @@
#include "broadcast_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "broadcast/broadcast_kernel_selector.h"
#include "broadcast/broadcast_kernel_base.h"
#include "cldnn/runtime/error_handler.hpp"
namespace cldnn {
namespace gpu {
namespace ocl {
struct broadcast_gpu : typed_primitive_gpu_impl<broadcast> {
using parent = typed_primitive_gpu_impl<broadcast>;
struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
using parent = typed_primitive_impl_ocl<broadcast>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<broadcast_gpu>(*this);
return make_unique<broadcast_impl>(*this);
}
static primitive_impl* create(const broadcast_node& arg) {
@ -57,29 +57,29 @@ struct broadcast_gpu : typed_primitive_gpu_impl<broadcast> {
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
return new broadcast_gpu(arg, best_kernels[0]);
return new broadcast_impl(arg, best_kernels[0]);
}
};
namespace detail {
attach_broadcast_gpu::attach_broadcast_gpu() {
auto val_fw = broadcast_gpu::create;
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
attach_broadcast_impl::attach_broadcast_impl() {
implementation_map<broadcast>::add(impl_types::ocl, broadcast_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::i64, format::bfyx),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::i32, format::bfzyx),
std::make_tuple(data_types::i64, format::bfzyx),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -0,0 +1,159 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "concatenation_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "concatenation/concatenation_kernel_selector.h"
#include "concatenation/concatenation_kernel_base.h"
#include <initializer_list>
namespace cldnn {
namespace ocl {
namespace {
kernel_selector::concat_axis convert_axis(concatenation::concatenation_axis axis) {
switch (axis) {
case concatenation::along_x:
return kernel_selector::concat_axis::X;
case concatenation::along_y:
return kernel_selector::concat_axis::Y;
case concatenation::along_z:
return kernel_selector::concat_axis::Z;
case concatenation::along_w:
return kernel_selector::concat_axis::W;
case concatenation::along_f:
return kernel_selector::concat_axis::FEATURE;
case concatenation::along_b:
return kernel_selector::concat_axis::BATCH;
default:
return kernel_selector::concat_axis::X;
}
}
} // namespace
struct concatenation_impl : typed_primitive_impl_ocl<concatenation> {
using parent = typed_primitive_impl_ocl<concatenation>;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<concatenation_impl>(*this);
}
concatenation_impl(const concatenation_node& arg, const kernel_selector::kernel_data& kd) : parent(arg, kd) {
if (!_outer.can_be_optimized()) {
CLDNN_ERROR_NOT_EQUAL(_outer.id(),
"Input count",
_outer.inputs_count(),
"kds size",
kd.kernels.size(),
"Error - not enough kernels for concatenation");
}
}
protected:
bool optimized_out(concatenation_inst& instance) const override {
return parent::optimized_out(instance) || _outer.can_be_optimized();
}
public:
static primitive_impl* create(const concatenation_node& arg) {
if (arg.can_be_optimized()) {
return new concatenation_impl(arg, {});
}
auto concat_params = get_default_params<kernel_selector::concatenation_params>(arg);
auto concat_optional_params =
get_default_optional_params<kernel_selector::concatenation_optional_params>(arg.get_program());
auto axis = arg.get_primitive()->axis;
concat_params.inputs.resize(arg.inputs_count());
for (size_t i = 0; i < arg.inputs_count(); ++i) {
const layout& input_layout = arg.input(i).get_output_layout();
concat_params.inputs[i] = convert_data_tensor(input_layout);
}
concat_params.axis = convert_axis(axis);
concat_optional_params.kernelPerInput = true;
auto& kernel_selector = kernel_selector::concatenation_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(concat_params, concat_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
concatenation_impl* concat = new concatenation_impl(arg, best_kernels[0]);
return concat;
}
};
namespace detail {
attach_concatenation_impl::attach_concatenation_impl() {
implementation_map<concatenation>::add(impl_types::ocl, concatenation_impl::create, {
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::i8, format::yxfb),
std::make_tuple(data_types::u8, format::yxfb),
std::make_tuple(data_types::i32, format::yxfb),
std::make_tuple(data_types::i64, format::yxfb),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::i64, format::bfyx),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::i8, format::byxf),
std::make_tuple(data_types::u8, format::byxf),
std::make_tuple(data_types::i32, format::byxf),
std::make_tuple(data_types::i64, format::byxf),
std::make_tuple(data_types::f32, format::fyxb),
std::make_tuple(data_types::f16, format::fyxb),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::i32, format::bfzyx),
std::make_tuple(data_types::i64, format::bfzyx),
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i64, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::i64, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::f32, format::bfwzyx),
std::make_tuple(data_types::f16, format::bfwzyx),
std::make_tuple(data_types::u8, format::bfwzyx),
std::make_tuple(data_types::i8, format::bfwzyx),
std::make_tuple(data_types::i32, format::bfwzyx),
std::make_tuple(data_types::i64, format::bfwzyx),
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -4,8 +4,8 @@
#include "convolution_inst.h"
#include "eltwise_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "kernel_runner.h"
@ -15,14 +15,14 @@
#include <memory>
namespace cldnn {
namespace gpu {
namespace ocl {
struct convolution_gpu : typed_primitive_gpu_impl<convolution> {
using parent = typed_primitive_gpu_impl<convolution>;
struct convolution_impl : typed_primitive_impl_ocl<convolution> {
using parent = typed_primitive_impl_ocl<convolution>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<convolution_gpu>(*this);
return make_unique<convolution_impl>(*this);
}
protected:
@ -151,7 +151,7 @@ public:
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with these arguments");
auto conv = new convolution_gpu(arg, best_kernels[0]);
auto conv = new convolution_impl(arg, best_kernels[0]);
return conv;
}
@ -159,55 +159,49 @@ public:
namespace detail {
attach_convolution_gpu::attach_convolution_gpu() {
auto val_fw = convolution_gpu::create;
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::winograd_2x3_s1_data), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::winograd_2x3_s1_data), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
// block f16 format
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
// block i8 format
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
// MMAD
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
attach_convolution_impl::attach_convolution_impl() {
implementation_map<convolution>::add(impl_types::ocl, convolution_impl::create, {
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::f32, format::winograd_2x3_s1_data),
std::make_tuple(data_types::f16, format::winograd_2x3_s1_data),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::i8, format::byxf),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -0,0 +1,118 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "crop_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "eltwise/eltwise_kernel_selector.h"
#include "eltwise/eltwise_kernel_base.h"
#include "cldnn/runtime/error_handler.hpp"
namespace cldnn {
namespace ocl {
struct crop_impl : typed_primitive_impl_ocl<crop> {
using parent = typed_primitive_impl_ocl<crop>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<crop_impl>(*this);
}
protected:
bool optimized_out(crop_inst& instance) const override {
return parent::optimized_out(instance) || _outer.can_be_optimized();
}
public:
static primitive_impl* create(const crop_node& arg) {
auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg, 1);
auto ew_optional_params =
get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
ew_params.operations.push_back(
{{kernel_selector::eltwise_params::InputType::Buffer(0)}, kernel_selector::eltwise_mode::ASSIGN});
const auto& input_layout = arg.input().get_output_layout();
ew_params.inputs[0] = convert_data_tensor(input_layout, 1, arg.get_primitive()->offsets);
auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto crop = new crop_impl(arg, best_kernels[0]);
return crop;
}
};
namespace detail {
attach_crop_impl::attach_crop_impl() {
implementation_map<crop>::add(impl_types::ocl, crop_impl::create, {
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::i64, format::yxfb),
std::make_tuple(data_types::i32, format::yxfb),
std::make_tuple(data_types::i8, format::yxfb),
std::make_tuple(data_types::u8, format::yxfb),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i64, format::bfyx),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::i64, format::byxf),
std::make_tuple(data_types::i32, format::byxf),
std::make_tuple(data_types::i8, format::byxf),
std::make_tuple(data_types::u8, format::byxf),
std::make_tuple(data_types::f32, format::fyxb),
std::make_tuple(data_types::f16, format::fyxb),
std::make_tuple(data_types::i64, format::fyxb),
std::make_tuple(data_types::i32, format::fyxb),
std::make_tuple(data_types::i8, format::fyxb),
std::make_tuple(data_types::u8, format::fyxb),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i64, format::bfzyx),
std::make_tuple(data_types::i32, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::f32, format::bfwzyx),
std::make_tuple(data_types::f16, format::bfwzyx),
std::make_tuple(data_types::i64, format::bfwzyx),
std::make_tuple(data_types::i32, format::bfwzyx),
std::make_tuple(data_types::i8, format::bfwzyx),
std::make_tuple(data_types::u8, format::bfwzyx),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i64, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::i64, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -3,8 +3,8 @@
//
#include "ctc_greedy_decoder_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "ctc_greedy_decoder/ctc_greedy_decoder_kernel_selector.h"
@ -15,14 +15,14 @@
using namespace cldnn;
namespace cldnn {
namespace gpu {
namespace ocl {
struct ctc_greedy_decoder_gpu : typed_primitive_gpu_impl<ctc_greedy_decoder> {
using parent = typed_primitive_gpu_impl<ctc_greedy_decoder>;
struct ctc_greedy_decoder_impl : typed_primitive_impl_ocl<ctc_greedy_decoder> {
using parent = typed_primitive_impl_ocl<ctc_greedy_decoder>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<ctc_greedy_decoder_gpu>(*this);
return make_unique<ctc_greedy_decoder_impl>(*this);
}
public:
@ -51,7 +51,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto grn = new ctc_greedy_decoder_gpu(arg, best_kernels[0]);
auto grn = new ctc_greedy_decoder_impl(arg, best_kernels[0]);
return grn;
}
@ -59,13 +59,15 @@ public:
namespace detail {
attach_ctc_greedy_decoder_gpu::attach_ctc_greedy_decoder_gpu() {
implementation_map<ctc_greedy_decoder>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), ctc_greedy_decoder_gpu::create);
implementation_map<ctc_greedy_decoder>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), ctc_greedy_decoder_gpu::create);
implementation_map<ctc_greedy_decoder>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), ctc_greedy_decoder_gpu::create);
implementation_map<ctc_greedy_decoder>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), ctc_greedy_decoder_gpu::create);
attach_ctc_greedy_decoder_impl::attach_ctc_greedy_decoder_impl() {
implementation_map<ctc_greedy_decoder>::add(impl_types::ocl, ctc_greedy_decoder_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::i64, format::bfyx),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -3,8 +3,8 @@
//
#include "cum_sum_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "cum_sum/cum_sum_kernel_selector.h"
#include "cum_sum/cum_sum_kernel_ref.h"
@ -13,7 +13,7 @@
using namespace cldnn;
namespace cldnn {
namespace gpu {
namespace ocl {
namespace {
kernel_selector::cum_sum_axis convert_axis(cum_sum::cum_sum_axis axis) {
@ -36,12 +36,12 @@ kernel_selector::cum_sum_axis convert_axis(cum_sum::cum_sum_axis axis) {
}
} // namespace
struct cum_sum_gpu : typed_primitive_gpu_impl<cum_sum> {
using parent = typed_primitive_gpu_impl<cum_sum>;
struct cum_sum_impl : typed_primitive_impl_ocl<cum_sum> {
using parent = typed_primitive_impl_ocl<cum_sum>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<cum_sum_gpu>(*this);
return make_unique<cum_sum_impl>(*this);
}
public:
@ -62,7 +62,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto cum_sum = new cum_sum_gpu(arg, best_kernels[0]);
auto cum_sum = new cum_sum_impl(arg, best_kernels[0]);
return cum_sum;
}
@ -70,16 +70,17 @@ public:
namespace detail {
attach_cum_sum_gpu::attach_cum_sum_gpu() {
auto val_fw = cum_sum_gpu::create;
implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
attach_cum_sum_impl::attach_cum_sum_impl() {
implementation_map<cum_sum>::add(impl_types::ocl, cum_sum_impl::create, {
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::f16, format::bfwzyx),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f32, format::bfwzyx),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -4,12 +4,12 @@
#include "custom_gpu_primitive_inst.h"
#include "cldnn/runtime/engine.hpp"
#include "implementation_map.h"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "network_impl.h"
#include "jitter.h"
#include "cldnn/runtime/error_handler.hpp"
#include "register_gpu.hpp"
#include "register.hpp"
#include <map>
#include <sstream>
@ -17,24 +17,24 @@
#include <memory>
#include <string>
using namespace cldnn;
namespace kernel_selector {
using jit_constants = kernel_selector::JitConstants;
}
namespace neural {
namespace cldnn {
namespace ocl {
struct custom_gpu_primitive_gpu : typed_primitive_impl<custom_gpu_primitive> {
struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
const custom_gpu_primitive_node& outer;
std::shared_ptr<kernel_selector::cl_kernel_data> cl_kernel;
std::vector<kernel::ptr> _kernels;
kernel_id _kernel_id;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<custom_gpu_primitive_gpu>(*this);
return make_unique<custom_gpu_primitive_impl>(*this);
}
custom_gpu_primitive_gpu(const custom_gpu_primitive_gpu& other)
custom_gpu_primitive_impl(const custom_gpu_primitive_impl& other)
: outer(other.outer)
, cl_kernel(other.cl_kernel)
, _kernels({})
@ -42,7 +42,7 @@ struct custom_gpu_primitive_gpu : typed_primitive_impl<custom_gpu_primitive> {
_kernels.emplace_back(std::move(outer.get_program().get_kernel(_kernel_id)->clone()));
}
custom_gpu_primitive_gpu(const custom_gpu_primitive_node& arg,
custom_gpu_primitive_impl(const custom_gpu_primitive_node& arg,
std::shared_ptr<kernel_selector::cl_kernel_data>& cl_kernel)
: outer(arg)
, cl_kernel(cl_kernel)
@ -224,14 +224,15 @@ static primitive_impl* create(const custom_gpu_primitive_node& arg) {
cl_kernel->params.arguments.push_back(get_arg(p));
}
return new custom_gpu_primitive_gpu(arg, cl_kernel);
}
} // namespace neural
namespace cldnn { namespace gpu { namespace detail {
attach_custom_gpu_primitive_gpu::attach_custom_gpu_primitive_gpu() {
implementation_map<custom_gpu_primitive>::add({{cldnn::engine_types::ocl, neural::create}});
return new custom_gpu_primitive_impl(arg, cl_kernel);
}
} } } // namespace cldnn::gpu::detail
namespace detail {
attach_custom_gpu_primitive_impl::attach_custom_gpu_primitive_impl() {
implementation_map<custom_gpu_primitive>::add(cldnn::impl_types::ocl, create, {});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -0,0 +1,148 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "deconvolution_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "deconvolution/deconvolution_kernel_selector.h"
#include "deconvolution/deconvolution_kernel_base.h"
#include <algorithm>
namespace cldnn {
namespace ocl {
struct deconvolution_impl : typed_primitive_impl_ocl<deconvolution> {
using parent = typed_primitive_impl_ocl<deconvolution>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<deconvolution_impl>(*this);
}
protected:
// TODO: share it with convolution and fully connected
bool validate_impl(const typed_primitive_inst<deconvolution>&) const override {
bool res = true;
CLDNN_ERROR_NOT_EQUAL(_outer.id(),
"deconvolution filling value",
_outer.get_output_layout().data_padding.filling_value(),
"padding mode",
0.0f,
"Unknown padding mode in deconvolution.");
return res;
}
kernel_arguments_data get_arguments(typed_primitive_inst<deconvolution>& instance, int32_t split) const override {
kernel_arguments_data args = parent::get_arguments(instance, split);
args.weights = instance.weights_memory(split);
args.bias = instance.bias_term() ? instance.bias_memory(split) : nullptr;
return args;
}
int32_t get_split() const override { return _outer.get_split(); }
uint32_t get_groups() const override { return _outer.get_groups(); }
public:
static primitive_impl* create(const deconvolution_node& arg) {
const auto& primitive = arg.get_primitive();
const auto& weights_layout = arg.weights(0).get_output_layout();
const auto& weights_size = weights_layout.size;
const auto& split = primitive->split();
const auto& stride = primitive->stride;
#if 0 // TODO: support dilation
const auto& dilation = primitive->dilation;
#else
const tensor dilation = {0, 0, 1, 1, 1};
#endif
const auto actual_split = split;
const auto& input_offset = primitive->input_offset;
const auto& groups = primitive->groups;
auto deconv_params = get_weights_bias_default_params<kernel_selector::deconvolution_params>(
arg,
(groups > 1) ? 1 : actual_split,
1,
primitive->grouped_weights_shape);
auto deconv_optional_params =
get_default_weights_bias_optional_params<kernel_selector::deconvolution_optional_params>(arg.get_program());
deconv_params.split = split;
deconv_params.groups = groups;
auto spatial_size = arg.get_output_layout().format.dimension() - 2;
uint32_t kx = weights_size.spatial[0];
uint32_t ky = weights_size.spatial[1];
uint32_t kz = spatial_size == 2 ? 1 : weights_size.spatial[2];
deconv_params.filterSize = { kx, ky, kz };
deconv_params.padding = {(uint32_t)std::max(-input_offset.spatial[0], 0),
(uint32_t)std::max(-input_offset.spatial[1], 0),
(uint32_t)std::max(-input_offset.spatial[2], 0)};
deconv_params.stride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1], (uint32_t)stride.spatial[2]};
deconv_params.dilation = {(uint32_t)dilation.spatial[0],
(uint32_t)dilation.spatial[1],
(uint32_t)dilation.spatial[2]};
auto& kernel_selector = kernel_selector::deconvolution_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(deconv_params, deconv_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with these arguments");
auto deconv = new deconvolution_impl(arg, best_kernels[0]);
return deconv;
}
};
namespace detail {
attach_deconvolution_impl::attach_deconvolution_impl() {
implementation_map<deconvolution>::add(impl_types::ocl, deconvolution_impl::create, {
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -3,8 +3,8 @@
//
#include "deformable_convolution_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "kernel_runner.h"
@ -13,14 +13,14 @@
#include <algorithm>
namespace cldnn {
namespace gpu {
namespace ocl {
struct deformable_conv_gpu : typed_primitive_gpu_impl<deformable_conv> {
using parent = typed_primitive_gpu_impl<deformable_conv>;
struct deformable_conv_impl : typed_primitive_impl_ocl<deformable_conv> {
using parent = typed_primitive_impl_ocl<deformable_conv>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<deformable_conv_gpu>(*this);
return make_unique<deformable_conv_impl>(*this);
}
protected:
@ -71,18 +71,18 @@ public:
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with these arguments");
auto conv = new deformable_conv_gpu(arg, best_kernels[0]);
auto conv = new deformable_conv_impl(arg, best_kernels[0]);
return conv;
}
};
struct deformable_interp_gpu : typed_primitive_gpu_impl<deformable_interp> {
using parent = typed_primitive_gpu_impl<deformable_interp>;
struct deformable_interp_impl : typed_primitive_impl_ocl<deformable_interp> {
using parent = typed_primitive_impl_ocl<deformable_interp>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<deformable_interp_gpu>(*this);
return make_unique<deformable_interp_impl>(*this);
}
protected:
@ -139,7 +139,7 @@ public:
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with these arguments");
auto conv = new deformable_interp_gpu(arg, best_kernels[0]);
auto conv = new deformable_interp_impl(arg, best_kernels[0]);
return conv;
}
@ -147,20 +147,20 @@ public:
namespace detail {
attach_deformable_conv_gpu::attach_deformable_conv_gpu() {
implementation_map<deformable_conv>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
deformable_conv_gpu::create);
implementation_map<deformable_conv>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
deformable_conv_gpu::create);
attach_deformable_conv_impl::attach_deformable_conv_impl() {
implementation_map<deformable_conv>::add(impl_types::ocl, deformable_conv_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
});
}
attach_deformable_interp_gpu::attach_deformable_interp_gpu() {
implementation_map<deformable_interp>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
deformable_interp_gpu::create);
implementation_map<deformable_interp>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
deformable_interp_gpu::create);
attach_deformable_interp_impl::attach_deformable_interp_impl() {
implementation_map<deformable_interp>::add(impl_types::ocl, deformable_interp_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -0,0 +1,71 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "depth_to_space_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "depth_to_space/depth_to_space_kernel_selector.h"
#include "depth_to_space/depth_to_space_kernel_ref.h"
#include "cldnn/runtime/error_handler.hpp"
#include "common_types.h"
using namespace cldnn;
namespace cldnn {
namespace ocl {
struct depth_to_space_impl : typed_primitive_impl_ocl<depth_to_space> {
using parent = typed_primitive_impl_ocl<depth_to_space>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<depth_to_space_impl>(*this);
}
public:
static primitive_impl* create(const depth_to_space_node& arg) {
auto depth_to_space_params = get_default_params<kernel_selector::depth_to_space_params>(arg);
auto depth_to_space_optional_params =
get_default_optional_params<kernel_selector::depth_to_space_optional_params>(arg.get_program());
depth_to_space_params.block_size = arg.get_primitive()->block_size;
depth_to_space_params.mode = arg.get_primitive()->mode == depth_to_space_mode::blocks_first ? kernel_selector::depth_to_space_mode::BLOCKS_FIRST
: kernel_selector::depth_to_space_mode::DEPTH_FIRST;
auto& kernel_selector = kernel_selector::depth_to_space_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(depth_to_space_params, depth_to_space_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto depth_to_space = new depth_to_space_impl(arg, best_kernels[0]);
return depth_to_space;
}
};
namespace detail {
attach_depth_to_space_impl::attach_depth_to_space_impl() {
implementation_map<depth_to_space>::add(impl_types::ocl, depth_to_space_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -0,0 +1,188 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "eltwise_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "eltwise/eltwise_kernel_selector.h"
#include "eltwise/eltwise_kernel_base.h"
#include <vector>
namespace cldnn {
namespace ocl {
struct eltwise_impl : typed_primitive_impl_ocl<eltwise> {
using parent = typed_primitive_impl_ocl<eltwise>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<eltwise_impl>(*this);
}
protected:
kernel_arguments_data get_arguments(typed_primitive_inst<eltwise>& instance, int32_t split) const override {
kernel_arguments_data args = parent::get_arguments(instance, split);
return args;
}
public:
static primitive_impl* create(const eltwise_node& arg) {
auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
auto ew_optional_params =
get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
for (size_t i = 1; i < arg.inputs_count(); i++) {
ew_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
}
const auto& primitive = arg.get_primitive();
ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0),
kernel_selector::eltwise_params::InputType::Buffer(1)},
convert_to_eltwise_mode(primitive->mode)});
for (uint32_t i = 2; i < static_cast<uint32_t>(arg.inputs_count()); i++) {
ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(i - 2),
kernel_selector::eltwise_params::InputType::Buffer(i)},
convert_to_eltwise_mode(primitive->mode)});
}
if (primitive->mode == eltwise_mode::sum) {
ew_params.coefficients = primitive->coefficients;
}
for (size_t i = 0; i < ew_params.inputs.size(); i++) {
if (!ew_params.inputs[i].SameDims(ew_params.output)) {
std::vector<int32_t> input_size = arg.input(i).get_output_layout().size.raw.vector();
std::vector<int32_t> output_size = arg.get_output_layout().size.raw.vector();
bool broadcast = false;
for (size_t d = 0; d < output_size.size(); d++) {
if (output_size[d] != 1 && input_size[d] == 1)
broadcast = true;
}
if (broadcast) {
ew_params.broadcast = true;
break;
} else {
ew_params.layoutBased = true;
break;
}
}
}
// stride
if (!primitive->stride.empty()) {
const auto& stride = primitive->stride;
ew_params.stride.resize(stride.size());
for (size_t i = 0; i < primitive->stride.size(); i++) {
ew_params.stride[i] = {(uint32_t)stride[i].spatial[0],
(uint32_t)stride[i].spatial[1],
(uint32_t)stride[i].spatial[2]};
}
}
// check if strides are the same
if (!ew_params.stride.empty()) {
const auto& stride = ew_params.stride[0];
for (size_t i = 1; i < ew_params.stride.size(); i++) {
if (stride.x != ew_params.stride[i].x || stride.y != ew_params.stride[i].y)
ew_params.layoutBased = true;
}
} else if (!ew_params.inputs[0].SameDimsSizes(ew_params.inputs[1])) {
ew_params.broadcast = true;
}
// TODO [LOW PRECISION]: check if this parameter's really needed. Maybe data types are enough
bool quantization = true;
for (size_t i = 0; i < arg.inputs_count(); i++) {
if (arg.input(i).get_output_layout().data_type != data_types::u8 &&
arg.input(i).get_output_layout().data_type != data_types::i8) {
quantization = false;
}
}
ew_params.int8_quantization = quantization;
auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto eltwise = new eltwise_impl(arg, best_kernels[0]);
return eltwise;
}
};
namespace detail {
attach_eltwise_impl::attach_eltwise_impl() {
implementation_map<eltwise>::add(impl_types::ocl, eltwise_impl::create, {
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::i8, format::yxfb),
std::make_tuple(data_types::i32, format::yxfb),
std::make_tuple(data_types::i64, format::yxfb),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::i64, format::bfyx),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::i8, format::byxf),
std::make_tuple(data_types::i32, format::byxf),
std::make_tuple(data_types::i64, format::byxf),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::i32, format::bfzyx),
std::make_tuple(data_types::i64, format::bfzyx),
std::make_tuple(data_types::f32, format::bfwzyx),
std::make_tuple(data_types::f16, format::bfwzyx),
std::make_tuple(data_types::i8, format::bfwzyx),
std::make_tuple(data_types::u8, format::bfwzyx),
std::make_tuple(data_types::i32, format::bfwzyx),
std::make_tuple(data_types::i64, format::bfwzyx),
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i64, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::i64, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -3,8 +3,8 @@
//
#include "embedding_bag_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "embedding_bag/embedding_bag_kernel_selector.h"
#include "embedding_bag/embedding_bag_kernel_ref.h"
@ -14,13 +14,13 @@
using namespace cldnn;
namespace cldnn {
namespace gpu {
struct embedding_bag_gpu : typed_primitive_gpu_impl<embedding_bag> {
using parent = typed_primitive_gpu_impl<embedding_bag>;
namespace ocl {
struct embedding_bag_impl : typed_primitive_impl_ocl<embedding_bag> {
using parent = typed_primitive_impl_ocl<embedding_bag>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<embedding_bag_gpu>(*this);
return make_unique<embedding_bag_impl>(*this);
}
public:
@ -58,7 +58,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto embedding_bag = new embedding_bag_gpu(arg, best_kernels[0]);
auto embedding_bag = new embedding_bag_impl(arg, best_kernels[0]);
return embedding_bag;
}
@ -66,12 +66,13 @@ public:
namespace detail {
attach_embedding_bag_gpu::attach_embedding_bag_gpu() {
auto val_fw = embedding_bag_gpu::create;
implementation_map<embedding_bag>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<embedding_bag>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
attach_embedding_bag_impl::attach_embedding_bag_impl() {
implementation_map<embedding_bag>::add(impl_types::ocl, embedding_bag_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -3,8 +3,8 @@
//
#include "extract_image_patches_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
@ -12,14 +12,14 @@
#include "extract_image_patches/extract_image_patches_kernel_ref.h"
namespace cldnn {
namespace gpu {
namespace ocl {
struct extract_image_patches_gpu : typed_primitive_gpu_impl<extract_image_patches> {
using parent = typed_primitive_gpu_impl<extract_image_patches>;
struct extract_image_patches_impl : typed_primitive_impl_ocl<extract_image_patches> {
using parent = typed_primitive_impl_ocl<extract_image_patches>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<extract_image_patches_gpu>(*this);
return make_unique<extract_image_patches_impl>(*this);
}
public:
@ -41,7 +41,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto extract_image_patches = new extract_image_patches_gpu(arg, best_kernels[0]);
auto extract_image_patches = new extract_image_patches_impl(arg, best_kernels[0]);
return extract_image_patches;
}
@ -49,16 +49,17 @@ public:
namespace detail {
attach_extract_image_patches_gpu::attach_extract_image_patches_gpu() {
implementation_map<extract_image_patches>::add(
{{std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), extract_image_patches_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), extract_image_patches_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), extract_image_patches_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), extract_image_patches_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), extract_image_patches_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), extract_image_patches_gpu::create}});
attach_extract_image_patches_impl::attach_extract_image_patches_impl() {
implementation_map<extract_image_patches>::add(impl_types::ocl, extract_image_patches_impl::create, {
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::i64, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -5,8 +5,8 @@
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "fully_connected_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "fully_connected/fully_connected_kernel_selector.h"
#include "fully_connected/fully_connected_params.h"
@ -20,14 +20,14 @@
#include <memory>
namespace cldnn {
namespace gpu {
namespace ocl {
struct fully_connected_gpu : typed_primitive_gpu_impl<fully_connected> {
using parent = typed_primitive_gpu_impl<fully_connected>;
struct fully_connected_impl : typed_primitive_impl_ocl<fully_connected> {
using parent = typed_primitive_impl_ocl<fully_connected>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<fully_connected_gpu>(*this);
return make_unique<fully_connected_impl>(*this);
}
protected:
@ -71,7 +71,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto fc = new fully_connected_gpu(arg, best_kernels[0]);
auto fc = new fully_connected_impl(arg, best_kernels[0]);
return fc;
}
@ -79,34 +79,29 @@ public:
namespace detail {
attach_fully_connected_gpu::attach_fully_connected_gpu() {
auto val_fw = fully_connected_gpu::create;
implementation_map<fully_connected>::add({
{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw},
// MMAD
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), val_fw},
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), val_fw},
// IMAD
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw},
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw},
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw},
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw},
{std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw},
{std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw},
// fs_b_yx_fsv32
{std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw},
attach_fully_connected_impl::attach_fully_connected_impl() {
implementation_map<fully_connected>::add(impl_types::ocl, fully_connected_impl::create, {
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -3,8 +3,8 @@
//
#include "fused_conv_eltwise_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "kernel_runner.h"
@ -14,14 +14,14 @@
#include <memory>
namespace cldnn {
namespace gpu {
namespace ocl {
struct fused_conv_eltwise_gpu : typed_primitive_gpu_impl<fused_conv_eltwise> {
using parent = typed_primitive_gpu_impl<fused_conv_eltwise>;
struct fused_conv_eltwise_impl : typed_primitive_impl_ocl<fused_conv_eltwise> {
using parent = typed_primitive_impl_ocl<fused_conv_eltwise>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<fused_conv_eltwise_gpu>(*this);
return make_unique<fused_conv_eltwise_impl>(*this);
}
protected:
@ -139,7 +139,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto conv = new fused_conv_eltwise_gpu(arg, best_kernels[0]);
auto conv = new fused_conv_eltwise_impl(arg, best_kernels[0]);
return conv;
}
@ -147,42 +147,27 @@ public:
namespace detail {
attach_fused_conv_eltwise_gpu::attach_fused_conv_eltwise_gpu() {
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
fused_conv_eltwise_gpu::create);
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
fused_conv_eltwise_gpu::create);
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
fused_conv_eltwise_gpu::create);
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
fused_conv_eltwise_gpu::create);
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
fused_conv_eltwise_gpu::create);
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
fused_conv_eltwise_gpu::create);
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
fused_conv_eltwise_gpu::create);
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
fused_conv_eltwise_gpu::create);
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx),
fused_conv_eltwise_gpu::create);
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16),
fused_conv_eltwise_gpu::create);
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16),
fused_conv_eltwise_gpu::create);
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
fused_conv_eltwise_gpu::create);
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
fused_conv_eltwise_gpu::create);
// IMAD
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4),
fused_conv_eltwise_gpu::create);
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4),
fused_conv_eltwise_gpu::create);
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::image_2d_rgba),
fused_conv_eltwise_gpu::create);
attach_fused_conv_eltwise_impl::attach_fused_conv_eltwise_impl() {
implementation_map<fused_conv_eltwise>::add(impl_types::ocl, fused_conv_eltwise_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::u8, format::image_2d_rgba),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -3,8 +3,8 @@
//
#include "gather_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "gather/gather_kernel_selector.h"
#include "gather/gather_kernel_ref.h"
@ -13,7 +13,7 @@
using namespace cldnn;
namespace cldnn {
namespace gpu {
namespace ocl {
kernel_selector::gather_axis convert_axis(gather::gather_axis axis) {
switch (axis) {
case gather::along_x:
@ -33,12 +33,12 @@ kernel_selector::gather_axis convert_axis(gather::gather_axis axis) {
}
}
struct gather_gpu : typed_primitive_gpu_impl<gather> {
using parent = typed_primitive_gpu_impl<gather>;
struct gather_impl : typed_primitive_impl_ocl<gather> {
using parent = typed_primitive_impl_ocl<gather>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<gather_gpu>(*this);
return make_unique<gather_impl>(*this);
}
public:
@ -61,7 +61,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto gather = new gather_gpu(arg, best_kernels[0]);
auto gather = new gather_impl(arg, best_kernels[0]);
return gather;
}
@ -69,21 +69,20 @@ public:
namespace detail {
attach_gather_gpu::attach_gather_gpu() {
auto val_fw = gather_gpu::create;
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
attach_gather_impl::attach_gather_impl() {
implementation_map<gather>::add(impl_types::ocl, gather_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i32, format::bfzyx),
std::make_tuple(data_types::f32, format::bfwzyx),
std::make_tuple(data_types::f16, format::bfwzyx),
std::make_tuple(data_types::i32, format::bfwzyx),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -0,0 +1,67 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "gather_nd_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "gather/gather_nd_kernel_selector.h"
#include "gather/gather_nd_kernel_ref.h"
using namespace cldnn;
namespace cldnn {
namespace ocl {
struct gather_nd_impl : typed_primitive_impl_ocl<gather_nd> {
using parent = typed_primitive_impl_ocl<gather_nd>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<gather_nd_impl>(*this);
}
static primitive_impl* create(const gather_nd_node& arg) {
auto gather_nd_params = get_default_params<kernel_selector::gather_nd_params>(arg);
auto gather_nd_optional_params =
get_default_optional_params<kernel_selector::gather_nd_optional_params>(arg.get_program());
gather_nd_params.indices_rank = arg.get_primitive()->indices_rank;
gather_nd_params.batch_dims = arg.get_primitive()->batch_dims;
gather_nd_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout()));
auto& kernel_selector = kernel_selector::gather_nd_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(gather_nd_params, gather_nd_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto gather_nd = new gather_nd_impl(arg, best_kernels[0]);
return gather_nd;
}
};
namespace detail {
attach_gather_nd_impl::attach_gather_nd_impl() {
implementation_map<gather_nd>::add(impl_types::ocl, gather_nd_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i32, format::bfzyx),
std::make_tuple(data_types::f32, format::bfwzyx),
std::make_tuple(data_types::f16, format::bfwzyx),
std::make_tuple(data_types::i32, format::bfwzyx),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -4,22 +4,22 @@
#include "gather_tree_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "gather_tree/gather_tree_kernel_selector.h"
#include "gather_tree/gather_tree_kernel_base.h"
#include "cldnn/runtime/error_handler.hpp"
namespace cldnn {
namespace gpu {
namespace ocl {
struct gather_tree_gpu : typed_primitive_gpu_impl<gather_tree> {
using parent = typed_primitive_gpu_impl<gather_tree>;
struct gather_tree_impl : typed_primitive_impl_ocl<gather_tree> {
using parent = typed_primitive_impl_ocl<gather_tree>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<gather_tree_gpu>(*this);
return make_unique<gather_tree_impl>(*this);
}
static primitive_impl* create(const gather_tree_node& arg) {
@ -39,22 +39,21 @@ struct gather_tree_gpu : typed_primitive_gpu_impl<gather_tree> {
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
return new gather_tree_gpu(arg, best_kernels[0]);
return new gather_tree_impl(arg, best_kernels[0]);
}
};
namespace detail {
attach_gather_tree_gpu::attach_gather_tree_gpu() {
auto val_fw = gather_tree_gpu::create;
implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw);
implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
}
attach_gather_tree_impl::attach_gather_tree_impl() {
implementation_map<gather_tree>::add(impl_types::ocl, gather_tree_impl::create, {
std::make_tuple(data_types::i32, format::yxfb),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::i32, format::byxf),
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f32, format::byxf),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -4,22 +4,22 @@
#include "gemm_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "gemm/gemm_kernel_selector.h"
#include "gemm/gemm_kernel_base.h"
#include "cldnn/runtime/error_handler.hpp"
namespace cldnn {
namespace gpu {
namespace ocl {
struct gemm_gpu : typed_primitive_gpu_impl<gemm> {
using parent = typed_primitive_gpu_impl<gemm>;
struct gemm_impl : typed_primitive_impl_ocl<gemm> {
using parent = typed_primitive_impl_ocl<gemm>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<gemm_gpu>(*this);
return make_unique<gemm_impl>(*this);
}
public:
@ -53,28 +53,29 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
return new gemm_gpu(arg, best_kernels[0]);
return new gemm_impl(arg, best_kernels[0]);
}
};
namespace detail {
attach_gemm_gpu::attach_gemm_gpu() {
auto val_fw = gemm_gpu::create;
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
attach_gemm_impl::attach_gemm_impl() {
implementation_map<gemm>::add(impl_types::ocl, gemm_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::f32, format::bfwzyx),
std::make_tuple(data_types::f16, format::bfwzyx),
std::make_tuple(data_types::i8, format::bfwzyx),
std::make_tuple(data_types::u8, format::bfwzyx),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -4,38 +4,37 @@
#include "generic_layer_inst.h"
#include "cldnn/runtime/engine.hpp"
#include "implementation_map.h"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "network_impl.h"
#include "register_gpu.hpp"
#include "register.hpp"
#include <vector>
using namespace cldnn;
namespace cldnn {
namespace ocl {
namespace neural {
struct generic_layer_gpu : typed_primitive_impl<generic_layer> {
struct generic_layer_impl : typed_primitive_impl<generic_layer> {
const generic_layer_node& outer;
const kernel_selector::cl_kernel_data& _cl_kernel_data;
std::vector<kernel::ptr> _kernels;
kernel_id _kernel_id;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<generic_layer_gpu>(*this);
return make_unique<generic_layer_impl>(*this);
}
generic_layer_gpu(const generic_layer_gpu& other)
generic_layer_impl(const generic_layer_impl& other)
: outer(other.outer)
, _cl_kernel_data(other._cl_kernel_data)
, _kernels({})
, _kernel_id(other._kernel_id) {
if (other._kernels.empty()) {
throw std::runtime_error("Can't copy generic_layer_gpu node: kernels vector is empty");
throw std::runtime_error("Can't copy generic_layer_impl node: kernels vector is empty");
}
_kernels.push_back(other._kernels.front()->clone());
}
generic_layer_gpu(const generic_layer_node& arg)
generic_layer_impl(const generic_layer_node& arg)
: outer(arg)
, _cl_kernel_data(*outer.get_primitive()->generic_params.clKernel.get())
, _kernels() {
@ -109,17 +108,17 @@ struct generic_layer_cpu : typed_primitive_impl<generic_layer> {
static primitive_impl* create(const generic_layer_node& arg) {
if (arg.get_primitive()->generic_params.engine == kernel_selector::generic_kernel_params::Engine::GPU) {
return new generic_layer_gpu(arg);
return new generic_layer_impl(arg);
} else {
return new generic_layer_cpu(arg);
}
}
} // namespace neural
namespace detail {
attach_generic_layer_impl::attach_generic_layer_impl() {
implementation_map<generic_layer>::add(cldnn::impl_types::ocl, create, {});
}
namespace cldnn { namespace gpu { namespace detail {
attach_generic_layer_gpu::attach_generic_layer_gpu() {
implementation_map<generic_layer>::add({ {cldnn::engine_types::ocl, neural::create} });
}
} } } // namespace cldnn::gpu::detail
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -3,8 +3,8 @@
//
#include "grn_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "grn/grn_kernel_selector.h"
@ -15,14 +15,14 @@
using namespace cldnn;
namespace cldnn {
namespace gpu {
namespace ocl {
struct grn_gpu : typed_primitive_gpu_impl<grn> {
using parent = typed_primitive_gpu_impl<grn>;
struct grn_impl : typed_primitive_impl_ocl<grn> {
using parent = typed_primitive_impl_ocl<grn>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<grn_gpu>(*this);
return make_unique<grn_impl>(*this);
}
public:
@ -40,7 +40,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto grn = new grn_gpu(arg, best_kernels[0]);
auto grn = new grn_impl(arg, best_kernels[0]);
return grn;
}
@ -48,11 +48,13 @@ public:
namespace detail {
attach_grn_gpu::attach_grn_gpu() {
implementation_map<grn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), grn_gpu::create);
implementation_map<grn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), grn_gpu::create);
attach_grn_impl::attach_grn_impl() {
implementation_map<grn>::add(impl_types::ocl, grn_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -0,0 +1,82 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "lrn_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "lrn/lrn_kernel_selector.h"
#include "lrn/lrn_kernel_base.h"
namespace cldnn {
namespace ocl {
struct lrn_impl : typed_primitive_impl_ocl<lrn> {
using parent = typed_primitive_impl_ocl<lrn>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<lrn_impl>(*this);
}
static primitive_impl* create(const lrn_node& arg) {
auto lrn_params = get_default_params<kernel_selector::lrn_params>(arg);
auto lrn_optional_params = get_default_optional_params<kernel_selector::lrn_optional_params>(arg.get_program());
const auto& primitive = arg.get_primitive();
lrn_params.alpha = primitive->alpha;
lrn_params.beta = primitive->beta;
lrn_params.k = primitive->k;
lrn_params.localSize = primitive->size;
lrn_params.divMode = kernel_selector::kernel_divider_mode::FIXED;
lrn_params.normMode = primitive->norm_region == lrn_norm_region_within_channel
? kernel_selector::lrn_mode::WITHIN_CHANNEL
: kernel_selector::lrn_mode::ACROSS_CHANNEL;
auto& kernel_selector = kernel_selector::lrn_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(lrn_params, lrn_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto lrn = new lrn_impl(arg, best_kernels[0]);
return lrn;
}
};
namespace detail {
attach_lrn_impl::attach_lrn_impl() {
implementation_map<lrn>::add(impl_types::ocl, lrn_impl::create, {
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::u8, format::yxfb),
std::make_tuple(data_types::i8, format::yxfb),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::u8, format::byxf),
std::make_tuple(data_types::i8, format::byxf),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -5,8 +5,8 @@
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "lstm_dynamic_input_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "lstm_dynamic/lstm_dynamic_input_kernel_selector.h"
#include "lstm_dynamic/lstm_dynamic_input_kernel_base.h"
@ -14,14 +14,14 @@
#include "cldnn/runtime/error_handler.hpp"
namespace cldnn {
namespace gpu {
namespace ocl {
struct lstm_dynamic_input_gpu : typed_primitive_gpu_impl<lstm_dynamic_input> {
using parent = typed_primitive_gpu_impl<lstm_dynamic_input>;
struct lstm_dynamic_input_impl : typed_primitive_impl_ocl<lstm_dynamic_input> {
using parent = typed_primitive_impl_ocl<lstm_dynamic_input>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<lstm_dynamic_input_gpu>(*this);
return make_unique<lstm_dynamic_input_impl>(*this);
}
protected:
@ -64,7 +64,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto lstm_dynamic = new lstm_dynamic_input_gpu(arg, best_kernels[0]);
auto lstm_dynamic = new lstm_dynamic_input_impl(arg, best_kernels[0]);
return lstm_dynamic;
}
@ -72,15 +72,13 @@ public:
namespace detail {
attach_lstm_dynamic_input_gpu::attach_lstm_dynamic_input_gpu() {
auto val_fw = lstm_dynamic_input_gpu::create;
implementation_map<lstm_dynamic_input>::add({
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
attach_lstm_dynamic_input_impl::attach_lstm_dynamic_input_impl() {
implementation_map<lstm_dynamic_input>::add(impl_types::ocl, lstm_dynamic_input_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -5,8 +5,8 @@
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "lstm_dynamic_timeloop_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "lstm_dynamic/lstm_dynamic_timeloop_kernel_selector.h"
#include "lstm_dynamic/lstm_dynamic_timeloop_kernel_base.h"
@ -14,14 +14,14 @@
#include "cldnn/runtime/error_handler.hpp"
namespace cldnn {
namespace gpu {
namespace ocl {
struct lstm_dynamic_timeloop_gpu : typed_primitive_gpu_impl<lstm_dynamic_timeloop> {
using parent = typed_primitive_gpu_impl<lstm_dynamic_timeloop>;
struct lstm_dynamic_timeloop_impl : typed_primitive_impl_ocl<lstm_dynamic_timeloop> {
using parent = typed_primitive_impl_ocl<lstm_dynamic_timeloop>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<lstm_dynamic_timeloop_gpu>(*this);
return make_unique<lstm_dynamic_timeloop_impl>(*this);
}
protected:
@ -85,7 +85,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto lstm_dynamic = new lstm_dynamic_timeloop_gpu(arg, best_kernels[0]);
auto lstm_dynamic = new lstm_dynamic_timeloop_impl(arg, best_kernels[0]);
return lstm_dynamic;
}
@ -93,15 +93,13 @@ public:
namespace detail {
attach_lstm_dynamic_timeloop_gpu::attach_lstm_dynamic_timeloop_gpu() {
auto val_fw = lstm_dynamic_timeloop_gpu::create;
implementation_map<lstm_dynamic_timeloop>::add({
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
attach_lstm_dynamic_timeloop_impl::attach_lstm_dynamic_timeloop_impl() {
implementation_map<lstm_dynamic_timeloop>::add(impl_types::ocl, lstm_dynamic_timeloop_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -5,8 +5,8 @@
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "lstm_elt_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "lstm/lstm_elt_kernel_selector.h"
#include "lstm/lstm_elt_kernel_base.h"
@ -14,14 +14,14 @@
#include "cldnn/runtime/error_handler.hpp"
namespace cldnn {
namespace gpu {
namespace ocl {
struct lstm_elt_gpu : typed_primitive_gpu_impl<lstm_elt> {
using parent = typed_primitive_gpu_impl<lstm_elt>;
struct lstm_elt_impl : typed_primitive_impl_ocl<lstm_elt> {
using parent = typed_primitive_impl_ocl<lstm_elt>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<lstm_elt_gpu>(*this);
return make_unique<lstm_elt_impl>(*this);
}
protected:
@ -85,7 +85,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto lstm_elt = new lstm_elt_gpu(arg, best_kernels[0]);
auto lstm_elt = new lstm_elt_impl(arg, best_kernels[0]);
return lstm_elt;
}
@ -93,17 +93,15 @@ public:
namespace detail {
attach_lstm_elt_gpu::attach_lstm_elt_gpu() {
auto val_fw = lstm_elt_gpu::create;
implementation_map<lstm_elt>::add({
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw},
attach_lstm_elt_impl::attach_lstm_elt_impl() {
implementation_map<lstm_elt>::add(impl_types::ocl, lstm_elt_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::f32, format::fyxb),
std::make_tuple(data_types::f16, format::fyxb),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -5,8 +5,8 @@
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "lstm_gemm_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "lstm/lstm_gemm_kernel_selector.h"
#include "lstm/lstm_gemm_kernel_base.h"
@ -14,14 +14,14 @@
#include "cldnn/runtime/error_handler.hpp"
namespace cldnn {
namespace gpu {
namespace ocl {
struct lstm_gemm_gpu : typed_primitive_gpu_impl<lstm_gemm> {
using parent = typed_primitive_gpu_impl<lstm_gemm>;
struct lstm_gemm_impl : typed_primitive_impl_ocl<lstm_gemm> {
using parent = typed_primitive_impl_ocl<lstm_gemm>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<lstm_gemm_gpu>(*this);
return make_unique<lstm_gemm_impl>(*this);
}
protected:
@ -82,7 +82,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto lstm_gemm = new lstm_gemm_gpu(arg, best_kernels[0]);
auto lstm_gemm = new lstm_gemm_impl(arg, best_kernels[0]);
return lstm_gemm;
}
@ -90,17 +90,15 @@ public:
namespace detail {
attach_lstm_gemm_gpu::attach_lstm_gemm_gpu() {
auto val_fw = lstm_gemm_gpu::create;
implementation_map<lstm_gemm>::add({
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw},
{std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw},
attach_lstm_gemm_impl::attach_lstm_gemm_impl() {
implementation_map<lstm_gemm>::add(impl_types::ocl, lstm_gemm_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::f32, format::fyxb),
std::make_tuple(data_types::f16, format::fyxb),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -3,8 +3,8 @@
//
#include "max_unpooling_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "network_impl.h"
#include "kernel_selector_helper.h"
@ -13,14 +13,14 @@
#include <vector>
namespace cldnn {
namespace gpu {
namespace ocl {
struct max_unpooling_gpu : typed_primitive_gpu_impl<max_unpooling> {
using parent = typed_primitive_gpu_impl<max_unpooling>;
struct max_unpooling_impl : typed_primitive_impl_ocl<max_unpooling> {
using parent = typed_primitive_impl_ocl<max_unpooling>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<max_unpooling_gpu>(*this);
return make_unique<max_unpooling_impl>(*this);
}
protected:
@ -55,7 +55,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto max_unpool = new max_unpooling_gpu(arg, best_kernels[0]);
auto max_unpool = new max_unpooling_impl(arg, best_kernels[0]);
return max_unpool;
}
@ -63,27 +63,20 @@ public:
namespace detail {
attach_max_unpooling_gpu::attach_max_unpooling_gpu() {
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
max_unpooling_gpu::create);
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
max_unpooling_gpu::create);
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
max_unpooling_gpu::create);
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
max_unpooling_gpu::create);
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
max_unpooling_gpu::create);
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
max_unpooling_gpu::create);
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
max_unpooling_gpu::create);
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
max_unpooling_gpu::create);
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
max_unpooling_gpu::create);
attach_max_unpooling_impl::attach_max_unpooling_impl() {
implementation_map<max_unpooling>::add(impl_types::ocl, max_unpooling_impl::create, {
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::i8, format::yxfb),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::i8, format::byxf),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -0,0 +1,32 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "mutable_data_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
namespace cldnn {
namespace ocl {
struct mutable_data_impl : public typed_primitive_impl_ocl<mutable_data> {
using parent = typed_primitive_impl_ocl<mutable_data>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<mutable_data_impl>(*this);
}
public:
static primitive_impl* create(mutable_data_node const& arg) { return new mutable_data_impl(arg, {}); }
};
namespace detail {
attach_mutable_data_impl::attach_mutable_data_impl() {
implementation_map<mutable_data>::add(impl_types::ocl, mutable_data_impl::create, {});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -0,0 +1,86 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "mvn_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "mvn/mvn_kernel_selector.h"
#include "mvn/mvn_kernel_base.h"
#include <algorithm>
using namespace cldnn;
namespace cldnn {
namespace ocl {
struct mvn_impl : typed_primitive_impl_ocl<mvn> {
using parent = typed_primitive_impl_ocl<mvn>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<mvn_impl>(*this);
}
public:
static primitive_impl* create(const mvn_node& arg) {
auto mvn_params = get_default_params<kernel_selector::mvn_params>(arg);
auto mvn_optional_params = get_default_optional_params<kernel_selector::mvn_optional_params>(arg.get_program());
mvn_params.mvnMode = arg.get_primitive()->across_channels ? kernel_selector::mvn_mode::ACROSS_CHANNELS
: kernel_selector::mvn_mode::WITHIN_CHANNELS;
mvn_params.mvnNormalizeVariance = arg.get_primitive()->normalize_variance;
mvn_params.epsilon = arg.get_primitive()->epsilon;
mvn_params.mvnEpsMode = arg.get_primitive()->eps_inside_sqrt ? kernel_selector::mvn_eps_mode::INSIDE_SQRT
: kernel_selector::mvn_eps_mode::OUTSIDE_SQRT;
auto& kernel_selector = kernel_selector::mvn_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(mvn_params, mvn_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto mvn = new mvn_impl(arg, best_kernels[0]);
return mvn;
}
};
namespace detail {
attach_mvn_impl::attach_mvn_impl() {
implementation_map<mvn>::add(impl_types::ocl, mvn_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -0,0 +1,83 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "normalize_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "normalize/normalize_kernel_selector.h"
#include "normalize/normalize_kernel_base.h"
#include <algorithm>
using namespace cldnn;
namespace cldnn {
namespace ocl {
struct normalize_impl : typed_primitive_impl_ocl<normalize> {
using parent = typed_primitive_impl_ocl<normalize>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<normalize_impl>(*this);
}
protected:
kernel_arguments_data get_arguments(typed_primitive_inst<normalize>& instance, int32_t split) const override {
kernel_arguments_data args = parent::get_arguments(instance, split);
args.scale_table = instance.scale_memory();
return args;
}
public:
static primitive_impl* create(const normalize_node& arg) {
auto norm_params = get_default_params<kernel_selector::normalize_params>(arg);
auto norm_optional_params =
get_default_optional_params<kernel_selector::normalize_optional_params>(arg.get_program());
const auto& scale_layout = arg.scale().get_output_layout();
norm_params.normMode = arg.get_primitive()->across_spatial ? kernel_selector::normalize_mode::ACROSS_SPATIAL
: kernel_selector::normalize_mode::WITHIN_SPATIAL;
norm_params.epsilon = arg.get_primitive()->epsilon;
norm_params.scaleTable = convert_data_tensor(scale_layout).FlattenFeatureAndSpatials();
auto& kernel_selector = kernel_selector::normalize_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(norm_params, norm_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto lrn = new normalize_impl(arg, best_kernels[0]);
return lrn;
}
};
namespace detail {
attach_normalize_impl::attach_normalize_impl() {
implementation_map<normalize>::add(impl_types::ocl, normalize_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::i8, format::yxfb),
std::make_tuple(data_types::u8, format::yxfb),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::i8, format::byxf),
std::make_tuple(data_types::u8, format::byxf),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -0,0 +1,74 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "one_hot_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "one_hot/one_hot_kernel_selector.h"
#include "one_hot/one_hot_kernel_base.h"
#include "cldnn/runtime/error_handler.hpp"
#include <vector>
namespace cldnn {
namespace ocl {
struct one_hot_impl : typed_primitive_impl_ocl<one_hot> {
using parent = typed_primitive_impl_ocl<one_hot>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<one_hot_impl>(*this);
}
static primitive_impl* create(const one_hot_node& arg) {
auto oh_params = get_default_params<kernel_selector::one_hot_params>(arg, 1);
auto oh_optional_params =
get_default_optional_params<kernel_selector::one_hot_optional_params>(arg.get_program());
oh_params.one_hot_axis = arg.get_primitive()->one_hot_axis;
oh_params.on_value = arg.get_primitive()->on_value;
oh_params.off_value = arg.get_primitive()->off_value;
auto output_sizes = arg.get_output_layout().format == format::bfzyx ?
arg.get_output_layout().size.sizes(format::bfzyx) :
arg.get_output_layout().size.sizes(format::bfyx);
oh_params.one_hot_limit = output_sizes[oh_params.one_hot_axis];
auto& kernel_selector = kernel_selector::one_hot_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(oh_params, oh_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with these arguments");
return new one_hot_impl(arg, best_kernels[0]);
}
};
namespace detail {
attach_one_hot_impl::attach_one_hot_impl() {
implementation_map<one_hot>::add(impl_types::ocl, one_hot_impl::create, {
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::i64, format::bfyx),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::i32, format::bfzyx),
std::make_tuple(data_types::i64, format::bfzyx),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -3,8 +3,8 @@
//
#include "permute_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "permute/permute_kernel_selector.h"
@ -13,14 +13,14 @@
using namespace cldnn;
namespace cldnn {
namespace gpu {
namespace ocl {
struct permute_gpu : typed_primitive_gpu_impl<permute> {
using parent = typed_primitive_gpu_impl<permute>;
struct permute_impl : typed_primitive_impl_ocl<permute> {
using parent = typed_primitive_impl_ocl<permute>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<permute_gpu>(*this);
return make_unique<permute_impl>(*this);
}
static primitive_impl* create(const permute_node& arg) {
@ -38,7 +38,7 @@ struct permute_gpu : typed_primitive_gpu_impl<permute> {
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto permute = new permute_gpu(arg, best_kernels[0]);
auto permute = new permute_impl(arg, best_kernels[0]);
return permute;
}
@ -46,12 +46,10 @@ struct permute_gpu : typed_primitive_gpu_impl<permute> {
namespace detail {
attach_permute_gpu::attach_permute_gpu() {
implementation_map<permute>::add({
{engine_types::ocl, permute_gpu::create},
});
attach_permute_impl::attach_permute_impl() {
implementation_map<permute>::add(impl_types::ocl, permute_impl::create, {});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -3,8 +3,8 @@
//
#include "pooling_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "pooling/pooling_kernel_selector.h"
@ -12,7 +12,7 @@
#include <algorithm>
namespace cldnn {
namespace gpu {
namespace ocl {
namespace {
void validate_args(const pooling_node& arg) {
@ -64,12 +64,12 @@ kernel_selector::kernel_divider_mode cldnn_2_kernel_divider_mode(pooling_mode mo
}
} // namespace
struct pooling_gpu : typed_primitive_gpu_impl<pooling> {
using parent = typed_primitive_gpu_impl<pooling>;
struct pooling_impl : typed_primitive_impl_ocl<pooling> {
using parent = typed_primitive_impl_ocl<pooling>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<pooling_gpu>(*this);
return make_unique<pooling_impl>(*this);
}
protected:
@ -148,7 +148,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto pool = new pooling_gpu(arg, best_kernels[0]);
auto pool = new pooling_impl(arg, best_kernels[0]);
return pool;
}
@ -156,66 +156,57 @@ public:
namespace detail {
attach_pooling_gpu::attach_pooling_gpu() {
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), pooling_gpu::create);
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fs_b_yx_fsv32), pooling_gpu::create);
attach_pooling_impl::attach_pooling_impl() {
implementation_map<pooling>::add(impl_types::ocl, pooling_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::i8, format::yxfb),
std::make_tuple(data_types::u8, format::yxfb),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::i8, format::byxf),
std::make_tuple(data_types::u8, format::byxf),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
std::make_tuple(data_types::f32, format::fs_b_yx_fsv32),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -2,11 +2,11 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "primitive_gpu_base.h"
#include "primitive_base.hpp"
#include <list>
namespace cldnn {
namespace gpu {
namespace ocl {
bool is_user_cpu(const program_node* user) {
if (user->can_be_optimized()) {
@ -28,5 +28,5 @@ bool is_any_user_cpu(const std::list<const program_node*>& users) {
}
return false;
}
} // namespace gpu
} // namespace cldnn
} // namespace ocl
} // namespace cldnn

View File

@ -11,30 +11,30 @@
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "network_impl.h"
#include "register_gpu.hpp"
#include "register.hpp"
#include <vector>
#include <list>
#include <utility>
namespace cldnn {
namespace gpu {
namespace ocl {
// checks if any user in a list is a cpu primitive
bool is_any_user_cpu(const std::list<const program_node*>& users);
/*
Base class for all GPU implementation of specified primitive type.
For example, all gpu convolution implementations should derive from typed_primitive_gpu_impl<convolution>.
For example, all gpu convolution implementations should derive from typed_primitive_impl_ocl<convolution>.
*/
template <class PType>
struct typed_primitive_gpu_impl : public typed_primitive_impl<PType> {
struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
const typed_program_node<PType>& _outer;
kernel_selector::kernel_data _kernel_data;
std::vector<kernel_id> _kernel_ids;
std::vector<kernel::ptr> _kernels;
std::vector<memory::cptr> _intermediates_memory;
typed_primitive_gpu_impl(const typed_primitive_gpu_impl<PType>& other)
typed_primitive_impl_ocl(const typed_primitive_impl_ocl<PType>& other)
: typed_primitive_impl<PType>(other._weights_reorder_params, other._kernel_name)
, _outer(other._outer)
, _kernel_data(other._kernel_data)
@ -52,7 +52,7 @@ struct typed_primitive_gpu_impl : public typed_primitive_impl<PType> {
}
}
typed_primitive_gpu_impl(const typed_program_node<PType>& arg, const kernel_selector::kernel_data& kd)
typed_primitive_impl_ocl(const typed_program_node<PType>& arg, const kernel_selector::kernel_data& kd)
: typed_primitive_impl<PType>(kd.weightsReorderParams, kd.kernelName),
_outer(arg),
_kernel_data(kd) {
@ -199,5 +199,5 @@ protected:
}
};
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "pyramid_roi_align/pyramid_roi_align_kernel_selector.h"
#include "pyramid_roi_align/pyramid_roi_align_kernel_base.h"
@ -14,14 +14,14 @@
#include <cmath>
namespace cldnn {
namespace gpu {
namespace ocl {
struct pyramid_roi_align_gpu : typed_primitive_gpu_impl<pyramid_roi_align> {
using parent = typed_primitive_gpu_impl<pyramid_roi_align>;
struct pyramid_roi_align_impl : typed_primitive_impl_ocl<pyramid_roi_align> {
using parent = typed_primitive_impl_ocl<pyramid_roi_align>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<pyramid_roi_align_gpu>(*this);
return make_unique<pyramid_roi_align_impl>(*this);
}
static primitive_impl* create(const pyramid_roi_align_node& arg) {
@ -54,28 +54,23 @@ struct pyramid_roi_align_gpu : typed_primitive_gpu_impl<pyramid_roi_align> {
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
return new pyramid_roi_align_gpu(arg, best_kernels[0]);
return new pyramid_roi_align_impl(arg, best_kernels[0]);
}
};
namespace detail {
attach_pyramid_roi_align_gpu::attach_pyramid_roi_align_gpu() {
auto val_fw = pyramid_roi_align_gpu::create;
implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
val_fw);
implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
val_fw);
implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
val_fw);
implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
val_fw);
implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
val_fw);
implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
val_fw);
attach_pyramid_roi_align_impl::attach_pyramid_roi_align_impl() {
implementation_map<pyramid_roi_align>::add(impl_types::ocl, pyramid_roi_align_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::f16, format::byxf),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -0,0 +1,160 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "quantize_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "quantize/quantize_kernel_selector.h"
#include "quantize/quantize_kernel_ref.h"
#include "cldnn/runtime/error_handler.hpp"
using namespace cldnn;
namespace cldnn {
namespace ocl {
struct quantize_impl : typed_primitive_impl_ocl<quantize> {
using parent = typed_primitive_impl_ocl<quantize>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<quantize_impl>(*this);
}
protected:
kernel_arguments_data get_arguments(typed_primitive_inst<quantize>& instance, int32_t) const override {
kernel_arguments_data args;
for (size_t i = 0; i < instance.inputs_memory_count(); i++) {
args.inputs.push_back(instance.input_memory_ptr(i));
}
if (instance.node.get_scale_shift_opt()) {
if (instance.node.get_dependencies().size() == 9) {
args.inputs.push_back(instance.dep_memory_ptr(5));
args.inputs.push_back(instance.dep_memory_ptr(6));
args.inputs.push_back(instance.dep_memory_ptr(7));
args.inputs.push_back(instance.dep_memory_ptr(8));
}
}
args.output = instance.output_memory_ptr();
return args;
}
public:
static primitive_impl* create(const quantize_node& arg) {
auto quantize_params = get_default_params<kernel_selector::quantize_params>(arg);
auto quantize_optional_params =
get_default_optional_params<kernel_selector::quantize_optional_params>(arg.get_program());
quantize_params.levels = arg.get_levels();
quantize_params.packed_binary_output = arg.get_packed_binary_output();
quantize_params.scale_shift_opt = arg.get_scale_shift_opt();
quantize_params.has_post_scale = arg.get_need_post_scale();
quantize_params.has_post_shift = arg.get_need_post_shift();
quantize_params.has_pre_shift = arg.get_need_pre_shift();
quantize_params.has_clamp = arg.get_need_clamp();
quantize_params.per_tensor_input_range = arg.get_per_tensor_input_range();
quantize_params.per_tensor_input_scale = arg.get_per_tensor_input_scale();
quantize_params.per_tensor_input_shift = arg.get_per_tensor_input_shift();
quantize_params.per_tensor_output_scale = arg.get_per_tensor_output_scale();
quantize_params.per_tensor_output_shift = arg.get_per_tensor_output_shift();
quantize_params.in_lo = arg.get_input_lo_val();
quantize_params.in_hi = arg.get_input_hi_val();
quantize_params.in_scale = arg.get_input_scale_val();
quantize_params.in_shift = arg.get_input_shift_val();
quantize_params.out_scale = arg.get_output_scale_val();
quantize_params.out_shift = arg.get_output_shift_val();
for (size_t i = 1; i < arg.inputs_count(); i++) {
quantize_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
}
const auto& output_layout = arg.get_output_layout();
quantize_params.output = convert_data_tensor(output_layout);
auto& kernel_selector = kernel_selector::quantize_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(quantize_params, quantize_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto quantize = new quantize_impl(arg, best_kernels[0]);
return quantize;
}
};
namespace detail {
attach_quantize_impl::attach_quantize_impl() {
implementation_map<quantize>::add(impl_types::ocl, quantize_impl::create, {
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
std::make_tuple(data_types::f32, format::fs_b_yx_fsv32),
std::make_tuple(data_types::i8, format::fs_b_yx_fsv32),
std::make_tuple(data_types::u8, format::fs_b_yx_fsv32),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::u8, format::byxf),
std::make_tuple(data_types::i8, format::byxf),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::i32, format::byxf),
std::make_tuple(data_types::u8, format::byxf),
std::make_tuple(data_types::i8, format::byxf),
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i32, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::f32, format::bfwzyx),
std::make_tuple(data_types::f16, format::bfwzyx),
std::make_tuple(data_types::i32, format::bfwzyx),
std::make_tuple(data_types::u8, format::bfwzyx),
std::make_tuple(data_types::i8, format::bfwzyx),
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -3,8 +3,8 @@
//
#include "reduce_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "reduce/reduce_kernel_selector.h"
#include "reduce/reduce_kernel_ref.h"
@ -15,7 +15,7 @@
using namespace cldnn;
namespace cldnn {
namespace gpu {
namespace ocl {
namespace {
kernel_selector::reduce_mode cldnn_2_reduce_mode(reduce_mode mode) {
switch (mode) {
@ -49,12 +49,12 @@ kernel_selector::reduce_mode cldnn_2_reduce_mode(reduce_mode mode) {
}
}
} // namespace
struct reduce_gpu : typed_primitive_gpu_impl<reduce> {
using parent = typed_primitive_gpu_impl<reduce>;
struct reduce_impl : typed_primitive_impl_ocl<reduce> {
using parent = typed_primitive_impl_ocl<reduce>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<reduce_gpu>(*this);
return make_unique<reduce_impl>(*this);
}
public:
@ -71,7 +71,7 @@ public:
CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
auto reduce = new reduce_gpu(arg, best_kernels[0]);
auto reduce = new reduce_impl(arg, best_kernels[0]);
return reduce;
}
@ -79,30 +79,31 @@ public:
namespace detail {
attach_reduce_gpu::attach_reduce_gpu() {
auto val_fw = reduce_gpu::create;
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_yx_fsv16), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
attach_reduce_impl::attach_reduce_impl() {
implementation_map<reduce>::add(impl_types::ocl, reduce_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i32, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::f32, format::bfwzyx),
std::make_tuple(data_types::f16, format::bfwzyx),
std::make_tuple(data_types::i32, format::bfwzyx),
std::make_tuple(data_types::i8, format::bfwzyx),
std::make_tuple(data_types::u8, format::bfwzyx),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -3,22 +3,22 @@
//
#include "region_yolo_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "region_yolo/region_yolo_kernel_selector.h"
#include "region_yolo/region_yolo_kernel_ref.h"
#include "cldnn/runtime/error_handler.hpp"
namespace cldnn {
namespace gpu {
namespace ocl {
struct region_yolo_gpu : typed_primitive_gpu_impl<region_yolo> {
using parent = typed_primitive_gpu_impl<region_yolo>;
struct region_yolo_impl : typed_primitive_impl_ocl<region_yolo> {
using parent = typed_primitive_impl_ocl<region_yolo>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<region_yolo_gpu>(*this);
return make_unique<region_yolo_impl>(*this);
}
static primitive_impl* create(const region_yolo_node& arg) {
@ -41,7 +41,7 @@ struct region_yolo_gpu : typed_primitive_gpu_impl<region_yolo> {
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto region_yolo_node = new region_yolo_gpu(arg, best_kernels[0]);
auto region_yolo_node = new region_yolo_impl(arg, best_kernels[0]);
return region_yolo_node;
}
@ -49,14 +49,15 @@ struct region_yolo_gpu : typed_primitive_gpu_impl<region_yolo> {
namespace detail {
attach_region_yolo_gpu::attach_region_yolo_gpu() {
implementation_map<region_yolo>::add(
{{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), region_yolo_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), region_yolo_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), region_yolo_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), region_yolo_gpu::create}});
attach_region_yolo_impl::attach_region_yolo_impl() {
implementation_map<region_yolo>::add(impl_types::ocl, region_yolo_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -0,0 +1,79 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "register.hpp"
namespace cldnn {
namespace ocl {
#define REGISTER_OCL(prim) \
static detail::attach_##prim##_impl attach_##prim
void register_implementations() {
REGISTER_OCL(activation);
REGISTER_OCL(arg_max_min);
REGISTER_OCL(average_unpooling);
REGISTER_OCL(binary_convolution);
REGISTER_OCL(border);
REGISTER_OCL(broadcast);
REGISTER_OCL(concatenation);
REGISTER_OCL(convolution);
REGISTER_OCL(crop);
REGISTER_OCL(custom_gpu_primitive);
REGISTER_OCL(deconvolution);
REGISTER_OCL(deformable_conv);
REGISTER_OCL(deformable_interp);
REGISTER_OCL(depth_to_space);
REGISTER_OCL(batch_to_space);
REGISTER_OCL(eltwise);
REGISTER_OCL(fully_connected);
REGISTER_OCL(gather);
REGISTER_OCL(gather_nd);
REGISTER_OCL(gemm);
REGISTER_OCL(lrn);
REGISTER_OCL(lstm_gemm);
REGISTER_OCL(lstm_elt);
REGISTER_OCL(max_unpooling);
REGISTER_OCL(mutable_data);
REGISTER_OCL(mvn);
REGISTER_OCL(normalize);
REGISTER_OCL(one_hot);
REGISTER_OCL(permute);
REGISTER_OCL(pooling);
REGISTER_OCL(pyramid_roi_align);
REGISTER_OCL(quantize);
REGISTER_OCL(reduce);
REGISTER_OCL(region_yolo);
REGISTER_OCL(reorder);
REGISTER_OCL(reorg_yolo);
REGISTER_OCL(reshape);
REGISTER_OCL(reverse_sequence);
REGISTER_OCL(roi_pooling);
REGISTER_OCL(scale);
REGISTER_OCL(scatter_update);
REGISTER_OCL(scatter_nd_update);
REGISTER_OCL(scatter_elements_update);
REGISTER_OCL(select);
REGISTER_OCL(shuffle_channels);
REGISTER_OCL(softmax);
REGISTER_OCL(space_to_batch);
REGISTER_OCL(space_to_depth);
REGISTER_OCL(strided_slice);
REGISTER_OCL(tile);
REGISTER_OCL(fused_conv_eltwise);
REGISTER_OCL(lstm_dynamic_input);
REGISTER_OCL(lstm_dynamic_timeloop);
REGISTER_OCL(generic_layer);
REGISTER_OCL(gather_tree);
REGISTER_OCL(resample);
REGISTER_OCL(grn);
REGISTER_OCL(ctc_greedy_decoder);
REGISTER_OCL(cum_sum);
REGISTER_OCL(embedding_bag);
REGISTER_OCL(extract_image_patches);
}
} // namespace ocl
} // namespace cldnn

View File

@ -13,20 +13,16 @@
#include "cldnn/primitives/border.hpp"
#include "cldnn/primitives/broadcast.hpp"
#include "cldnn/primitives/concatenation.hpp"
#include "cldnn/primitives/condition.hpp"
#include "cldnn/primitives/convolution.hpp"
#include "cldnn/primitives/crop.hpp"
#include "cldnn/primitives/custom_gpu_primitive.hpp"
#include "cldnn/primitives/data.hpp"
#include "cldnn/primitives/deconvolution.hpp"
#include "cldnn/primitives/depth_to_space.hpp"
#include "cldnn/primitives/detection_output.hpp"
#include "cldnn/primitives/eltwise.hpp"
#include "cldnn/primitives/fully_connected.hpp"
#include "cldnn/primitives/gather.hpp"
#include "cldnn/primitives/gather_nd.hpp"
#include "cldnn/primitives/gemm.hpp"
#include "cldnn/primitives/input_layout.hpp"
#include "cldnn/primitives/lrn.hpp"
#include "cldnn/primitives/lstm.hpp"
#include "cldnn/primitives/lstm_dynamic.hpp"
@ -37,8 +33,6 @@
#include "cldnn/primitives/one_hot.hpp"
#include "cldnn/primitives/permute.hpp"
#include "cldnn/primitives/pooling.hpp"
#include "cldnn/primitives/prior_box.hpp"
#include "cldnn/primitives/proposal.hpp"
#include "cldnn/primitives/pyramid_roi_align.hpp"
#include "cldnn/primitives/quantize.hpp"
#include "cldnn/primitives/reduce.hpp"
@ -63,97 +57,88 @@
#include "cldnn/primitives/fused_conv_eltwise.hpp"
#include "cldnn/primitives/lstm_dynamic_input.hpp"
#include "cldnn/primitives/lstm_dynamic_timeloop.hpp"
#include "cldnn/primitives/non_max_suppression.hpp"
#include "cldnn/primitives/grn.hpp"
#include "cldnn/primitives/ctc_greedy_decoder.hpp"
#include "cldnn/primitives/loop.hpp"
#include "generic_layer.hpp"
namespace cldnn { namespace gpu {
void register_implementations_gpu();
namespace cldnn {
namespace ocl {
void register_implementations();
namespace detail {
#define REGISTER_GPU(prim) \
struct attach_##prim##_gpu { \
attach_##prim##_gpu(); \
#define REGISTER_OCL(prim) \
struct attach_##prim##_impl { \
attach_##prim##_impl(); \
}
REGISTER_GPU(activation);
REGISTER_GPU(arg_max_min);
REGISTER_GPU(average_unpooling);
REGISTER_GPU(batch_to_space);
REGISTER_GPU(binary_convolution);
REGISTER_GPU(border);
REGISTER_GPU(broadcast);
REGISTER_GPU(concatenation);
REGISTER_GPU(condition);
REGISTER_GPU(convolution);
REGISTER_GPU(crop);
REGISTER_GPU(custom_gpu_primitive);
REGISTER_GPU(data);
REGISTER_GPU(deconvolution);
REGISTER_GPU(deformable_conv);
REGISTER_GPU(deformable_interp);
REGISTER_GPU(depth_to_space);
REGISTER_GPU(detection_output);
REGISTER_GPU(eltwise);
REGISTER_GPU(embed);
REGISTER_GPU(fully_connected);
REGISTER_GPU(gather);
REGISTER_GPU(gather_nd);
REGISTER_GPU(gemm);
REGISTER_GPU(input_layout);
REGISTER_GPU(lookup_table);
REGISTER_GPU(lrn);
REGISTER_GPU(lstm_gemm);
REGISTER_GPU(lstm_elt);
REGISTER_GPU(max_unpooling);
REGISTER_GPU(mutable_data);
REGISTER_GPU(mvn);
REGISTER_GPU(normalize);
REGISTER_GPU(one_hot);
REGISTER_GPU(permute);
REGISTER_GPU(pooling);
REGISTER_GPU(prior_box);
REGISTER_GPU(proposal);
REGISTER_GPU(pyramid_roi_align);
REGISTER_GPU(quantize);
REGISTER_GPU(reduce);
REGISTER_GPU(region_yolo);
REGISTER_GPU(reorder);
REGISTER_GPU(reorg_yolo);
REGISTER_GPU(reshape);
REGISTER_GPU(reverse_sequence);
REGISTER_GPU(roi_pooling);
REGISTER_GPU(scale);
REGISTER_GPU(scatter_update);
REGISTER_GPU(scatter_elements_update);
REGISTER_GPU(scatter_nd_update);
REGISTER_GPU(select);
REGISTER_GPU(shuffle_channels);
REGISTER_GPU(softmax);
REGISTER_GPU(space_to_batch);
REGISTER_GPU(space_to_depth);
REGISTER_GPU(strided_slice);
REGISTER_GPU(tile);
REGISTER_GPU(fused_conv_eltwise);
REGISTER_GPU(lstm_dynamic_input);
REGISTER_GPU(lstm_dynamic_timeloop);
REGISTER_GPU(generic_layer);
REGISTER_GPU(gather_tree);
REGISTER_GPU(resample);
REGISTER_GPU(non_max_suppression);
REGISTER_GPU(grn);
REGISTER_GPU(ctc_greedy_decoder);
REGISTER_GPU(cum_sum);
REGISTER_GPU(embedding_bag);
REGISTER_GPU(extract_image_patches);
REGISTER_GPU(loop);
REGISTER_OCL(activation);
REGISTER_OCL(arg_max_min);
REGISTER_OCL(average_unpooling);
REGISTER_OCL(batch_to_space);
REGISTER_OCL(binary_convolution);
REGISTER_OCL(border);
REGISTER_OCL(broadcast);
REGISTER_OCL(concatenation);
REGISTER_OCL(convolution);
REGISTER_OCL(crop);
REGISTER_OCL(custom_gpu_primitive);
REGISTER_OCL(data);
REGISTER_OCL(deconvolution);
REGISTER_OCL(deformable_conv);
REGISTER_OCL(deformable_interp);
REGISTER_OCL(depth_to_space);
REGISTER_OCL(eltwise);
REGISTER_OCL(embed);
REGISTER_OCL(fully_connected);
REGISTER_OCL(gather);
REGISTER_OCL(gather_nd);
REGISTER_OCL(gemm);
REGISTER_OCL(lrn);
REGISTER_OCL(lstm_gemm);
REGISTER_OCL(lstm_elt);
REGISTER_OCL(max_unpooling);
REGISTER_OCL(mutable_data);
REGISTER_OCL(mvn);
REGISTER_OCL(normalize);
REGISTER_OCL(one_hot);
REGISTER_OCL(permute);
REGISTER_OCL(pooling);
REGISTER_OCL(pyramid_roi_align);
REGISTER_OCL(quantize);
REGISTER_OCL(reduce);
REGISTER_OCL(region_yolo);
REGISTER_OCL(reorder);
REGISTER_OCL(reorg_yolo);
REGISTER_OCL(reshape);
REGISTER_OCL(reverse_sequence);
REGISTER_OCL(roi_pooling);
REGISTER_OCL(scale);
REGISTER_OCL(scatter_update);
REGISTER_OCL(scatter_elements_update);
REGISTER_OCL(scatter_nd_update);
REGISTER_OCL(select);
REGISTER_OCL(shuffle_channels);
REGISTER_OCL(softmax);
REGISTER_OCL(space_to_batch);
REGISTER_OCL(space_to_depth);
REGISTER_OCL(strided_slice);
REGISTER_OCL(tile);
REGISTER_OCL(fused_conv_eltwise);
REGISTER_OCL(lstm_dynamic_input);
REGISTER_OCL(lstm_dynamic_timeloop);
REGISTER_OCL(generic_layer);
REGISTER_OCL(gather_tree);
REGISTER_OCL(resample);
REGISTER_OCL(grn);
REGISTER_OCL(ctc_greedy_decoder);
REGISTER_OCL(cum_sum);
REGISTER_OCL(embedding_bag);
REGISTER_OCL(extract_image_patches);
#undef REGISTER_GPU
#undef REGISTER_OCL
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -3,22 +3,22 @@
//
#include "reorder_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "reorder/reorder_kernel_selector.h"
#include "reorder/reorder_kernel_base.h"
#include "cldnn/runtime/error_handler.hpp"
namespace cldnn {
namespace gpu {
namespace ocl {
struct reorder_gpu : typed_primitive_gpu_impl<reorder> {
using parent = typed_primitive_gpu_impl<reorder>;
struct reorder_impl : typed_primitive_impl_ocl<reorder> {
using parent = typed_primitive_impl_ocl<reorder>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<reorder_gpu>(*this);
return make_unique<reorder_impl>(*this);
}
protected:
@ -108,7 +108,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto reorder = new reorder_gpu(arg, best_kernels[0]);
auto reorder = new reorder_impl(arg, best_kernels[0]);
return reorder;
}
@ -116,10 +116,10 @@ public:
namespace detail {
attach_reorder_gpu::attach_reorder_gpu() {
implementation_map<reorder>::add({{engine_types::ocl, reorder_gpu::create}});
attach_reorder_impl::attach_reorder_impl() {
implementation_map<reorder>::add(impl_types::ocl, reorder_impl::create, {});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -3,22 +3,22 @@
//
#include "reorg_yolo_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "reorg_yolo/reorg_yolo_kernel_selector.h"
#include "reorg_yolo/reorg_yolo_kernel_ref.h"
#include "cldnn/runtime/error_handler.hpp"
namespace cldnn {
namespace gpu {
namespace ocl {
struct reorg_yolo_gpu : typed_primitive_gpu_impl<reorg_yolo> {
using parent = typed_primitive_gpu_impl<reorg_yolo>;
struct reorg_yolo_impl : typed_primitive_impl_ocl<reorg_yolo> {
using parent = typed_primitive_impl_ocl<reorg_yolo>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<reorg_yolo_gpu>(*this);
return make_unique<reorg_yolo_impl>(*this);
}
static primitive_impl* create(const reorg_yolo_node& arg) {
@ -38,7 +38,7 @@ struct reorg_yolo_gpu : typed_primitive_gpu_impl<reorg_yolo> {
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto reorg_yolo_node = new reorg_yolo_gpu(arg, best_kernels[0]);
auto reorg_yolo_node = new reorg_yolo_impl(arg, best_kernels[0]);
return reorg_yolo_node;
}
@ -46,16 +46,17 @@ struct reorg_yolo_gpu : typed_primitive_gpu_impl<reorg_yolo> {
namespace detail {
attach_reorg_yolo_gpu::attach_reorg_yolo_gpu() {
auto val_fw = reorg_yolo_gpu::create;
implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
attach_reorg_yolo_impl::attach_reorg_yolo_impl() {
implementation_map<reorg_yolo>::add(impl_types::ocl, reorg_yolo_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -3,15 +3,15 @@
//
#include "resample_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "kernel_selector/core/actual_kernels/resample/resample_kernel_selector.h"
#include "kernel_selector/core/actual_kernels/resample/resample_kernel_base.h"
namespace cldnn {
namespace gpu {
namespace ocl {
namespace {
inline kernel_selector::sample_type convert_to_sample_type(resample_type type) {
@ -96,12 +96,12 @@ inline kernel_selector::interpolate_axis convert_axis(resample::resample_axis ax
}
} // namespace
struct resample_gpu : typed_primitive_gpu_impl<resample> {
using parent = typed_primitive_gpu_impl<resample>;
struct resample_impl : typed_primitive_impl_ocl<resample> {
using parent = typed_primitive_impl_ocl<resample>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<resample_gpu>(*this);
return make_unique<resample_impl>(*this);
}
static primitive_impl* create(const resample_node& arg) {
@ -135,7 +135,7 @@ struct resample_gpu : typed_primitive_gpu_impl<resample> {
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto resample = new resample_gpu(arg, best_kernels[0]);
auto resample = new resample_impl(arg, best_kernels[0]);
return resample;
}
@ -143,31 +143,32 @@ struct resample_gpu : typed_primitive_gpu_impl<resample> {
namespace detail {
attach_resample_gpu::attach_resample_gpu() {
implementation_map<resample>::add(
{{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), resample_gpu::create}});
attach_resample_impl::attach_resample_impl() {
implementation_map<resample>::add(impl_types::ocl, resample_impl::create, {
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -3,28 +3,28 @@
//
#include "reshape_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "reshape/reshape_kernel_ref.h"
#include "reshape/reshape_kernel_selector.h"
#include "cldnn/runtime/error_handler.hpp"
namespace cldnn {
namespace gpu {
namespace ocl {
struct reshape_gpu : public typed_primitive_gpu_impl<reshape> {
using parent = typed_primitive_gpu_impl<reshape>;
struct reshape_impl : public typed_primitive_impl_ocl<reshape> {
using parent = typed_primitive_impl_ocl<reshape>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<reshape_gpu>(*this);
return make_unique<reshape_impl>(*this);
}
public:
static primitive_impl* create(reshape_node const& arg) {
if (arg.can_be_optimized()) {
return new reshape_gpu(arg, {});
return new reshape_impl(arg, {});
}
auto reorder_params = get_default_params<kernel_selector::reshape_params>(arg);
@ -39,7 +39,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto reshape = new reshape_gpu(arg, best_kernels[0]);
auto reshape = new reshape_impl(arg, best_kernels[0]);
return reshape;
}
@ -47,10 +47,10 @@ public:
namespace detail {
attach_reshape_gpu::attach_reshape_gpu() {
implementation_map<reshape>::add({{engine_types::ocl, reshape_gpu::create}});
attach_reshape_impl::attach_reshape_impl() {
implementation_map<reshape>::add(impl_types::ocl, reshape_impl::create, {});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -3,8 +3,8 @@
//
#include "reverse_sequence_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "reverse_sequence/reverse_sequence_kernel_selector.h"
#include "reverse_sequence/reverse_sequence_kernel_ref.h"
@ -13,13 +13,13 @@
using namespace cldnn;
namespace cldnn {
namespace gpu {
struct reverse_sequence_gpu : typed_primitive_gpu_impl<reverse_sequence> {
using parent = typed_primitive_gpu_impl<reverse_sequence>;
namespace ocl {
struct reverse_sequence_impl : typed_primitive_impl_ocl<reverse_sequence> {
using parent = typed_primitive_impl_ocl<reverse_sequence>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<reverse_sequence_gpu>(*this);
return make_unique<reverse_sequence_impl>(*this);
}
public:
@ -41,7 +41,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto reverse_sequence = new reverse_sequence_gpu(arg, best_kernels[0]);
auto reverse_sequence = new reverse_sequence_impl(arg, best_kernels[0]);
return reverse_sequence;
}
@ -49,15 +49,16 @@ public:
namespace detail {
attach_reverse_sequence_gpu::attach_reverse_sequence_gpu() {
auto val_fw = reverse_sequence_gpu::create;
implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
attach_reverse_sequence_impl::attach_reverse_sequence_impl() {
implementation_map<reverse_sequence>::add(impl_types::ocl, reverse_sequence_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -3,15 +3,15 @@
//
#include "roi_pooling_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "cldnn/runtime/error_handler.hpp"
#include "kernel_selector_helper.h"
#include "roi_pooling/roi_pooling_kernel_selector.h"
#include "roi_pooling/roi_pooling_kernel_ref.h"
namespace cldnn {
namespace gpu {
namespace ocl {
namespace {
kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) {
@ -33,12 +33,12 @@ kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) {
}
} // namespace
struct roi_pooling_gpu : typed_primitive_gpu_impl<roi_pooling> {
using parent = typed_primitive_gpu_impl<roi_pooling>;
struct roi_pooling_impl : typed_primitive_impl_ocl<roi_pooling> {
using parent = typed_primitive_impl_ocl<roi_pooling>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<roi_pooling_gpu>(*this);
return make_unique<roi_pooling_impl>(*this);
}
protected:
@ -108,7 +108,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto roi_pool = new roi_pooling_gpu(arg, best_kernels[0]);
auto roi_pool = new roi_pooling_impl(arg, best_kernels[0]);
return roi_pool;
}
@ -116,13 +116,13 @@ public:
namespace detail {
attach_roi_pooling_gpu::attach_roi_pooling_gpu() {
implementation_map<roi_pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
roi_pooling_gpu::create);
implementation_map<roi_pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
roi_pooling_gpu::create);
attach_roi_pooling_impl::attach_roi_pooling_impl() {
implementation_map<roi_pooling>::add(impl_types::ocl, roi_pooling_impl::create, {
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::f32, format::bfyx),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

View File

@ -0,0 +1,136 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "scale_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "eltwise/eltwise_kernel_selector.h"
#include "eltwise/eltwise_kernel_base.h"
#include "cldnn/runtime/error_handler.hpp"
using namespace cldnn;
namespace cldnn {
namespace ocl {
struct scale_impl : typed_primitive_impl_ocl<scale> {
using parent = typed_primitive_impl_ocl<scale>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<scale_impl>(*this);
}
protected:
kernel_arguments_data get_arguments(typed_primitive_inst<scale>& instance, int32_t split) const override {
kernel_arguments_data args = parent::get_arguments(instance, split);
args.inputs = {instance.input_memory_ptr(), instance.scale_memory()};
args.output = instance.output_memory_ptr();
if (_outer.bias_term()) {
args.inputs.push_back(instance.bias_memory());
}
return args;
}
public:
static primitive_impl* create(const scale_node& arg) {
auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
auto ew_optional_params =
get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
ew_params.inputs.push_back(convert_data_tensor(arg.scale_in().get_output_layout()));
ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0),
kernel_selector::eltwise_params::InputType::Buffer(1)},
kernel_selector::eltwise_mode::MUL});
if (arg.bias_term()) {
ew_params.inputs.push_back(convert_data_tensor(arg.bias().get_output_layout()));
ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(0),
kernel_selector::eltwise_params::InputType::Buffer(2)},
kernel_selector::eltwise_mode::ADD});
}
ew_params.layoutBased = true;
auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto scale = new scale_impl(arg, best_kernels[0]);
return scale;
}
};
namespace detail {
attach_scale_impl::attach_scale_impl() {
implementation_map<scale>::add(impl_types::ocl, scale_impl::create, {
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::i32, format::yxfb),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::i32, format::byxf),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i32, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::f32, format::bfwzyx),
std::make_tuple(data_types::f16, format::bfwzyx),
std::make_tuple(data_types::i32, format::bfwzyx),
std::make_tuple(data_types::u8, format::bfwzyx),
std::make_tuple(data_types::i8, format::bfwzyx),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
std::make_tuple(data_types::i32, format::fs_b_yx_fsv32),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
std::make_tuple(data_types::i32, format::b_fs_yx_fsv4),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
std::make_tuple(data_types::i32, format::b_fs_yx_fsv32),
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
std::make_tuple(data_types::i32, format::b_fs_zyx_fsv32),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -3,8 +3,8 @@
//
#include "scatter_elements_update_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "scatter_update/scatter_elements_update_kernel_selector.h"
#include "scatter_update/scatter_elements_update_kernel_ref.h"
@ -13,7 +13,7 @@
using namespace cldnn;
namespace cldnn {
namespace gpu {
namespace ocl {
kernel_selector::scatter_update_axis convert_axis(scatter_elements_update::scatter_elements_update_axis axis, const scatter_elements_update_node& arg) {
switch (axis) {
case scatter_elements_update::along_x:
@ -34,12 +34,12 @@ kernel_selector::scatter_update_axis convert_axis(scatter_elements_update::scatt
return kernel_selector::scatter_update_axis::X;
}
struct scatter_elements_update_gpu : typed_primitive_gpu_impl<scatter_elements_update> {
using parent = typed_primitive_gpu_impl<scatter_elements_update>;
struct scatter_elements_update_impl : typed_primitive_impl_ocl<scatter_elements_update> {
using parent = typed_primitive_impl_ocl<scatter_elements_update>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<scatter_elements_update_gpu>(*this);
return make_unique<scatter_elements_update_impl>(*this);
}
public:
@ -61,7 +61,7 @@ public:
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto scatter_elements_update = new scatter_elements_update_gpu(arg, best_kernels[0]);
auto scatter_elements_update = new scatter_elements_update_impl(arg, best_kernels[0]);
return scatter_elements_update;
}
@ -69,21 +69,20 @@ public:
namespace detail {
attach_scatter_elements_update_gpu::attach_scatter_elements_update_gpu() {
auto val_fw = scatter_elements_update_gpu::create;
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
attach_scatter_elements_update_impl::attach_scatter_elements_update_impl() {
implementation_map<scatter_elements_update>::add(impl_types::ocl, scatter_elements_update_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i32, format::bfzyx),
std::make_tuple(data_types::f32, format::bfwzyx),
std::make_tuple(data_types::f16, format::bfwzyx),
std::make_tuple(data_types::i32, format::bfwzyx),
});
}
} // namespace detail
} // namespace gpu
} // namespace ocl
} // namespace cldnn

Some files were not shown because too many files have changed in this diff Show More