[GPU] (I)Dft with single layer test (#9891)
* dft with single layer test * idft with single layer test * fix output param usage in dft * update dft according to the clang-format * move output layout setup to calc_output_layout * add support for other dimensions * add clDNN unit test for DFT/IDFT * remove unnecessary original rank * use defined formats in kernel * fix dft docs * changes after review * Revert "fix dft docs" This reverts commit 45b05172dfd161d92dae6d26e0f1b74748e56fd5. Co-authored-by: Serhii Pavlovskyi <spavlovskyi@lohika.com> Co-authored-by: Mykhailo Hnap <mhnap@lohika.com>
This commit is contained in:
committed by
GitHub
parent
e767e9e243
commit
4b08ce4787
@@ -211,8 +211,10 @@ REGISTER_FACTORY(v6, ExperimentalDetectronTopKROIs)
|
||||
REGISTER_FACTORY(v6, ExperimentalDetectronGenerateProposalsSingleImage);
|
||||
|
||||
// ------------------------------ Supported v7 ops ------------------------------ //
|
||||
REGISTER_FACTORY(v7, DFT);
|
||||
REGISTER_FACTORY(v7, Gather);
|
||||
REGISTER_FACTORY(v7, Gelu);
|
||||
REGISTER_FACTORY(v7, IDFT);
|
||||
|
||||
// ------------------------------ Supported v8 ops ------------------------------ //
|
||||
REGISTER_FACTORY(v8, Slice);
|
||||
|
||||
52
src/plugins/intel_gpu/include/intel_gpu/primitives/dft.hpp
Normal file
52
src/plugins/intel_gpu/include/intel_gpu/primitives/dft.hpp
Normal file
@@ -0,0 +1,52 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <openvino/core/shape.hpp>
|
||||
|
||||
#include "primitive.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Kind of DFT operation.
|
||||
enum class dft_kind {
|
||||
forward,
|
||||
inverse,
|
||||
};
|
||||
|
||||
/// @brief DFT primitive.
|
||||
struct dft : public primitive_base<dft> {
|
||||
CLDNN_DECLARE_PRIMITIVE(dft)
|
||||
|
||||
/// @brief Constructs DFT primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param axes Axes to perform DFT.
|
||||
/// @param output_shape Output shape.
|
||||
/// @param kind Kind of DFT operation.
|
||||
dft(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
std::vector<int64_t>&& axes,
|
||||
const ov::Shape& output_shape,
|
||||
dft_kind kind,
|
||||
const primitive_id& ext_prim_id = {},
|
||||
const padding& output_padding = {})
|
||||
: primitive_base(id, {input}, ext_prim_id, output_padding),
|
||||
axes(std::move(axes)),
|
||||
output_shape(output_shape),
|
||||
kind(kind) {}
|
||||
|
||||
std::vector<int64_t> axes;
|
||||
ov::Shape output_shape;
|
||||
dft_kind kind;
|
||||
};
|
||||
|
||||
} // namespace cldnn
|
||||
40
src/plugins/intel_gpu/src/graph/dft.cpp
Normal file
40
src/plugins/intel_gpu/src/graph/dft.cpp
Normal file
@@ -0,0 +1,40 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <dft_inst.h>
|
||||
#include <primitive_type_base.h>
|
||||
|
||||
#include "json_object.h"
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
primitive_type_id dft::type_id() {
|
||||
static primitive_type_base<dft> instance;
|
||||
return &instance;
|
||||
}
|
||||
|
||||
layout typed_primitive_inst<dft>::calc_output_layout(const dft_node& node) {
|
||||
auto primitive = node.get_primitive();
|
||||
auto input_layout = node.input().get_output_layout();
|
||||
|
||||
std::vector<tensor::value_type> dims_converted(primitive->output_shape.begin(), primitive->output_shape.end());
|
||||
auto output_format = input_layout.format;
|
||||
|
||||
// Extend shape to 4d by pushing ones before the last dim
|
||||
for (auto i = dims_converted.size(); i < 4; ++i) {
|
||||
dims_converted.insert(std::prev(dims_converted.end()), 1);
|
||||
}
|
||||
|
||||
return {input_layout.data_type, output_format, tensor(output_format, dims_converted)};
|
||||
}
|
||||
|
||||
std::string typed_primitive_inst<dft>::to_string(const dft_node& node) {
|
||||
auto desc = node.get_primitive();
|
||||
auto node_info = node.desc_to_json();
|
||||
std::ostringstream os;
|
||||
node_info->dump(os);
|
||||
return os.str();
|
||||
}
|
||||
|
||||
} // namespace cldnn
|
||||
63
src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp
Normal file
63
src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp
Normal file
@@ -0,0 +1,63 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <dft/dft_kernel_ref.h>
|
||||
#include <dft/dft_kernel_selector.h>
|
||||
#include <dft_inst.h>
|
||||
#include <kernel_selector_helper.h>
|
||||
|
||||
#include <impls/implementation_map.hpp>
|
||||
#include <intel_gpu/runtime/error_handler.hpp>
|
||||
|
||||
#include "primitive_base.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct dft_impl : typed_primitive_impl_ocl<dft> {
|
||||
using typed_primitive_impl_ocl::typed_primitive_impl_ocl;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<dft_impl>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const dft_node& arg) {
|
||||
auto params = get_default_params<kernel_selector::dft_params>(arg);
|
||||
auto primitive = arg.get_primitive();
|
||||
params.axes = primitive->axes;
|
||||
if (primitive->kind == dft_kind::inverse) {
|
||||
params.kind = kernel_selector::dft_params::inverse;
|
||||
}
|
||||
auto optional_params = get_default_optional_params<kernel_selector::dft_optional_params>(arg.get_program());
|
||||
|
||||
auto& kernel_selector = kernel_selector::dft_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(params, optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
return new dft_impl{arg, best_kernels.front()};
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_dft_impl::attach_dft_impl() {
|
||||
implementation_map<dft>::add(impl_types::ocl,
|
||||
dft_impl::create,
|
||||
{
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
||||
@@ -28,6 +28,7 @@ void register_implementations() {
|
||||
REGISTER_OCL(deformable_interp);
|
||||
REGISTER_OCL(depth_to_space);
|
||||
REGISTER_OCL(detection_output);
|
||||
REGISTER_OCL(dft);
|
||||
REGISTER_OCL(batch_to_space);
|
||||
REGISTER_OCL(experimental_detectron_generate_proposals_single_image);
|
||||
REGISTER_OCL(experimental_detectron_roi_feature_extractor);
|
||||
|
||||
@@ -101,6 +101,7 @@ REGISTER_OCL(deformable_conv);
|
||||
REGISTER_OCL(deformable_interp);
|
||||
REGISTER_OCL(depth_to_space);
|
||||
REGISTER_OCL(detection_output);
|
||||
REGISTER_OCL(dft);
|
||||
REGISTER_OCL(experimental_detectron_generate_proposals_single_image);
|
||||
REGISTER_OCL(experimental_detectron_roi_feature_extractor);
|
||||
REGISTER_OCL(experimental_detectron_topk_rois);
|
||||
|
||||
33
src/plugins/intel_gpu/src/graph/include/dft_inst.h
Normal file
33
src/plugins/intel_gpu/src/graph/include/dft_inst.h
Normal file
@@ -0,0 +1,33 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <intel_gpu/primitives/dft.hpp>
|
||||
|
||||
#include "primitive_inst.h"
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
template <>
|
||||
struct typed_program_node<dft> : public typed_program_node_base<dft> {
|
||||
using typed_program_node_base::typed_program_node_base;
|
||||
|
||||
program_node& input() const {
|
||||
return get_dependency(0);
|
||||
}
|
||||
};
|
||||
|
||||
using dft_node = typed_program_node<dft>;
|
||||
|
||||
template <>
|
||||
class typed_primitive_inst<dft> : public typed_primitive_inst_base<dft> {
|
||||
public:
|
||||
using typed_primitive_inst_base::typed_primitive_inst_base;
|
||||
|
||||
static layout calc_output_layout(const dft_node& node);
|
||||
static std::string to_string(const dft_node& node);
|
||||
};
|
||||
|
||||
} // namespace cldnn
|
||||
@@ -17,6 +17,7 @@ enum class KernelType {
|
||||
AVERAGE_UNPOOLING,
|
||||
CONVOLUTION,
|
||||
DECONVOLUTION,
|
||||
DFT,
|
||||
LRN,
|
||||
NORMALIZE,
|
||||
POOLING,
|
||||
|
||||
@@ -0,0 +1,160 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "dft_kernel_ref.h"
|
||||
|
||||
#include <kernel_selector_utils.h>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
namespace {
|
||||
|
||||
CommonDispatchData SetDefault(const dft_params& params) {
|
||||
CommonDispatchData dispatch_data;
|
||||
const auto in_layout = params.inputs.front().GetLayout();
|
||||
const auto& output = params.outputs.front();
|
||||
const auto out_layout = output.GetLayout();
|
||||
std::vector<std::vector<Tensor::DataChannelName>> dims_by_gws;
|
||||
|
||||
// We are skipping X, since it contains complex pairs and always has dimension 2
|
||||
switch (out_layout) {
|
||||
case DataLayout::bfyx:
|
||||
dispatch_data.gws = {output.Y().v, output.Feature().v, output.Batch().v};
|
||||
dims_by_gws = {{Tensor::DataChannelName::Y},
|
||||
{Tensor::DataChannelName::FEATURE},
|
||||
{Tensor::DataChannelName::BATCH}};
|
||||
break;
|
||||
case DataLayout::bfzyx:
|
||||
dispatch_data.gws = {output.Y().v, output.Z().v, output.Feature().v * output.Batch().v};
|
||||
dims_by_gws = {{Tensor::DataChannelName::Y},
|
||||
{Tensor::DataChannelName::Z},
|
||||
{Tensor::DataChannelName::FEATURE, Tensor::DataChannelName::BATCH}};
|
||||
break;
|
||||
case DataLayout::bfwzyx:
|
||||
dispatch_data.gws = {output.Y().v, output.Z().v * output.W().v, output.Feature().v * output.Batch().v};
|
||||
dims_by_gws = {{Tensor::DataChannelName::Y},
|
||||
{Tensor::DataChannelName::Z, Tensor::DataChannelName::W},
|
||||
{Tensor::DataChannelName::FEATURE, Tensor::DataChannelName::BATCH}};
|
||||
break;
|
||||
default:
|
||||
throw std::invalid_argument("Unsupported data layout for dft primitive");
|
||||
}
|
||||
|
||||
dispatch_data.lws =
|
||||
GetOptimalLocalWorkGroupSizes(dispatch_data.gws, params.engineInfo, in_layout, out_layout, dims_by_gws);
|
||||
|
||||
return dispatch_data;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void MakeJitConstForAxis(JitConstants& jit, const DataLayout& layout, int64_t index, T value) {
|
||||
std::string name = "AXIS";
|
||||
switch (index) {
|
||||
case 0:
|
||||
jit.AddConstant(MakeJitConstant(name + "_BATCH", value));
|
||||
break;
|
||||
case 1:
|
||||
jit.AddConstant(MakeJitConstant(name + "_FEATURE", value));
|
||||
break;
|
||||
case 2:
|
||||
if (layout == DataLayout::bfwzyx) {
|
||||
jit.AddConstant(MakeJitConstant(name + "_W", value));
|
||||
} else if (layout == DataLayout::bfzyx) {
|
||||
jit.AddConstant(MakeJitConstant(name + "_Z", value));
|
||||
} else { // DataLayout::bfyx
|
||||
jit.AddConstant(MakeJitConstant(name + "_Y", value));
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
if (layout == DataLayout::bfwzyx) {
|
||||
jit.AddConstant(MakeJitConstant(name + "_Z", value));
|
||||
} else { // DataLayout::bfzyx
|
||||
jit.AddConstant(MakeJitConstant(name + "_Y", value));
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
jit.AddConstant(MakeJitConstant(name + "_Y", value));
|
||||
break;
|
||||
default:
|
||||
throw std::invalid_argument("Unsupported axis for dft primitive");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
KernelsData DFTKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
KernelsData kernels_data;
|
||||
if (!Validate(params, options)) {
|
||||
return kernels_data;
|
||||
}
|
||||
kernels_data.push_back(KernelData::Default<dft_params>(params));
|
||||
KernelData& kernel_data = kernels_data.front();
|
||||
auto& derived_params = dynamic_cast<dft_params&>(*kernel_data.params.get());
|
||||
auto dispatch_data = SetDefault(derived_params);
|
||||
auto entry_point = GetEntryPoint(kernelName, derived_params.layerID, params, options);
|
||||
auto jit_constants = GetJitConstants(derived_params);
|
||||
auto jit = CreateJit(kernelName, jit_constants, entry_point);
|
||||
auto& clKernelData = kernel_data.kernels[0];
|
||||
FillCLKernelData(clKernelData, dispatch_data, params.engineInfo, kernelName, jit, entry_point);
|
||||
return kernels_data;
|
||||
}
|
||||
|
||||
KernelsPriority DFTKernelRef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const {
|
||||
return DONT_USE_IF_HAVE_SOMETHING_ELSE;
|
||||
}
|
||||
|
||||
ParamsKey DFTKernelRef::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::F16);
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableOutputDataType(Datatype::F16);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableInputLayout(DataLayout::bfyx);
|
||||
k.EnableInputLayout(DataLayout::bfzyx);
|
||||
k.EnableInputLayout(DataLayout::bfwzyx);
|
||||
k.EnableOutputLayout(DataLayout::bfyx);
|
||||
k.EnableOutputLayout(DataLayout::bfzyx);
|
||||
k.EnableOutputLayout(DataLayout::bfwzyx);
|
||||
k.EnableBatching();
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
return k;
|
||||
}
|
||||
|
||||
bool DFTKernelRef::Validate(const Params& p, const optional_params& o) const {
|
||||
if (p.GetType() != KernelType::DFT || o.GetType() != KernelType::DFT) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto& params = dynamic_cast<const dft_params&>(p);
|
||||
if (params.inputs.size() != 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
JitConstants DFTKernelRef::GetJitConstants(const dft_params& params) const {
|
||||
auto jit = MakeBaseParamsJitConstants(params);
|
||||
const auto out_layout = params.outputs.front().GetLayout();
|
||||
const auto out_sizes = params.outputs.front().LogicalDims();
|
||||
const auto in_sizes = params.inputs.front().LogicalDims();
|
||||
|
||||
// We are skipping X, since it contains complex pairs and should not be in axes
|
||||
const auto dims_size = in_sizes.size() - 1;
|
||||
|
||||
size_t s = 1;
|
||||
for (auto axis : params.axes) {
|
||||
// opencl kernels have inverted order of dimensions with respect to axis spec: x is smallest index, b is largest
|
||||
auto inverted_axis = dims_size - axis;
|
||||
s *= out_sizes[inverted_axis];
|
||||
MakeJitConstForAxis(jit, out_layout, axis, std::min(out_sizes[inverted_axis], in_sizes[inverted_axis]));
|
||||
}
|
||||
if (params.kind == dft_params::inverse) {
|
||||
jit.AddConstant(MakeJitConstant("INVERSE_DFT_MULTIPLIER", 1.f / s));
|
||||
}
|
||||
return jit;
|
||||
}
|
||||
|
||||
} // namespace kernel_selector
|
||||
@@ -0,0 +1,35 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_base_opencl.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
struct dft_params : public base_params {
|
||||
std::vector<int64_t> axes;
|
||||
enum Kind {
|
||||
forward,
|
||||
inverse,
|
||||
} kind = forward;
|
||||
dft_params() : base_params{KernelType::DFT} {}
|
||||
};
|
||||
|
||||
struct dft_optional_params : optional_params {
|
||||
dft_optional_params() : optional_params{KernelType::DFT} {}
|
||||
};
|
||||
|
||||
class DFTKernelRef : public KernelBaseOpenCL {
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
JitConstants GetJitConstants(const dft_params& params) const;
|
||||
|
||||
public:
|
||||
DFTKernelRef() : KernelBaseOpenCL{"dft_ref"} {}
|
||||
};
|
||||
|
||||
} // namespace kernel_selector
|
||||
@@ -0,0 +1,24 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "dft_kernel_selector.h"
|
||||
|
||||
#include "dft_kernel_ref.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
dft_kernel_selector::dft_kernel_selector() {
|
||||
Attach<DFTKernelRef>();
|
||||
}
|
||||
|
||||
KernelsData dft_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
|
||||
return GetNaiveBestKernel(params, options, KernelType::DFT);
|
||||
}
|
||||
|
||||
dft_kernel_selector& dft_kernel_selector::Instance() {
|
||||
static dft_kernel_selector instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
} // namespace kernel_selector
|
||||
@@ -0,0 +1,18 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <kernel_selector.h>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class dft_kernel_selector : public kernel_selector_base {
|
||||
public:
|
||||
dft_kernel_selector();
|
||||
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
|
||||
static dft_kernel_selector& Instance();
|
||||
};
|
||||
|
||||
} // namespace kernel_selector
|
||||
@@ -0,0 +1,139 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#define GET_INDEX(prefix, ORDER) CAT(prefix, _GET_INDEX)(ORDER)
|
||||
|
||||
// alternative: https://github.com/OpenCL/ComplexMath/blob/master/clcomplex.h
|
||||
typedef float2 cfloat;
|
||||
#define real(a) ((a).s0)
|
||||
#define imag(a) ((a).s1)
|
||||
#define cmult(a, b) ((cfloat)(real(a) * real(b) - imag(a) * imag(b), real(a) * imag(b) + imag(a) * real(b)))
|
||||
#define crmult(a, b) ((cfloat)(real(a) * (b), imag(a) * (b)))
|
||||
#define cadd(a, b) ((cfloat)(real(a) + real(b), imag(a) + imag(b)))
|
||||
#define expi(x) ((cfloat)(cos(x), sin(x)))
|
||||
#define expmi(x) ((cfloat)(cos(x), -sin(x)))
|
||||
#define cload(p, offset, pitch) ((cfloat)((p)[offset], (p)[(offset) + (pitch)]))
|
||||
#define cstore(p, offset, pitch, x) ((p)[offset] = real(x), (p)[(offset) + (pitch)] = imag(x))
|
||||
#define czero() ((cfloat)(0))
|
||||
|
||||
// TODO: pregenerate e{r,i} array on host in macro. maybe it could be done with kernel which runs once?
|
||||
KERNEL(dft_ref)(const __global INPUT0_TYPE* data, __global OUTPUT_TYPE* output) {
|
||||
const uint dim0 = get_global_id(0);
|
||||
const uint dim1 = get_global_id(1);
|
||||
const uint dim2 = get_global_id(2);
|
||||
|
||||
const uint x = 0;
|
||||
const uint y = dim0;
|
||||
#if OUTPUT_DIMS == 4
|
||||
# define ORDER b, f, y, x
|
||||
# define ORDER_K kb, kf, ky, x
|
||||
const uint f = dim1;
|
||||
const uint b = dim2;
|
||||
#elif OUTPUT_DIMS == 5
|
||||
# define ORDER b, f, z, y, x
|
||||
# define ORDER_K kb, kf, kz, ky, x
|
||||
const uint z = dim1;
|
||||
const uint f = dim2 % OUTPUT_FEATURE_NUM;
|
||||
const uint b = dim2 / OUTPUT_FEATURE_NUM;
|
||||
#elif OUTPUT_DIMS == 6
|
||||
# define ORDER b, f, w, z, y, x
|
||||
# define ORDER_K kb, kf, kw, kz, ky, x
|
||||
const uint z = dim1 % OUTPUT_SIZE_Z;
|
||||
const uint w = dim1 / OUTPUT_SIZE_Z;
|
||||
const uint f = dim2 % OUTPUT_FEATURE_NUM;
|
||||
const uint b = dim2 / OUTPUT_FEATURE_NUM;
|
||||
#endif
|
||||
|
||||
// TODO: use OUTPUT_TYPE for intermediate calculations?
|
||||
// We don't use it for now as we will lose a lot of precision for f16 and tests won't pass
|
||||
cfloat Y = czero();
|
||||
const float PI2 = M_PI_F * 2;
|
||||
|
||||
#ifdef AXIS_Y
|
||||
const float ay = PI2 * y / OUTPUT_SIZE_Y;
|
||||
#endif
|
||||
#ifdef AXIS_Z
|
||||
const float az = PI2 * z / OUTPUT_SIZE_Z;
|
||||
#endif
|
||||
#ifdef AXIS_W
|
||||
const float aw = PI2 * w / OUTPUT_SIZE_W;
|
||||
#endif
|
||||
#ifdef AXIS_FEATURE
|
||||
const float af = PI2 * f / OUTPUT_FEATURE_NUM;
|
||||
#endif
|
||||
#ifdef AXIS_BATCH
|
||||
const float ab = PI2 * b / OUTPUT_BATCH_NUM;
|
||||
#endif
|
||||
|
||||
#ifdef AXIS_BATCH
|
||||
for (uint kb = 0; kb < AXIS_BATCH; ++kb)
|
||||
#else
|
||||
# define kb b
|
||||
#endif
|
||||
#ifdef AXIS_FEATURE
|
||||
for (uint kf = 0; kf < AXIS_FEATURE; ++kf)
|
||||
#else
|
||||
# define kf f
|
||||
#endif
|
||||
#ifdef AXIS_W
|
||||
for (uint kw = 0; kw < AXIS_W; ++kw)
|
||||
#else
|
||||
# define kw w
|
||||
#endif
|
||||
#ifdef AXIS_Z
|
||||
for (uint kz = 0; kz < AXIS_Z; ++kz)
|
||||
#else
|
||||
# define kz z
|
||||
#endif
|
||||
#ifdef AXIS_Y
|
||||
for (uint ky = 0; ky < AXIS_Y; ++ky)
|
||||
#else
|
||||
# define ky y
|
||||
#endif
|
||||
{
|
||||
float a = 0;
|
||||
#ifdef AXIS_Y
|
||||
a += ay * ky;
|
||||
#endif
|
||||
#ifdef AXIS_Z
|
||||
a += az * kz;
|
||||
#endif
|
||||
#ifdef AXIS_W
|
||||
a += aw * kw;
|
||||
#endif
|
||||
#ifdef AXIS_FEATURE
|
||||
a += af * kf;
|
||||
#endif
|
||||
#ifdef AXIS_BATCH
|
||||
a += ab * kb;
|
||||
#endif
|
||||
const cfloat X = cload(data, GET_INDEX(INPUT0, ORDER_K), INPUT0_X_PITCH);
|
||||
#ifdef INVERSE_DFT_MULTIPLIER
|
||||
const cfloat E = expi(a);
|
||||
#else
|
||||
const cfloat E = expmi(a);
|
||||
#endif
|
||||
Y = cadd(Y, cmult(X, E));
|
||||
}
|
||||
|
||||
#ifdef INVERSE_DFT_MULTIPLIER
|
||||
Y = crmult(Y, INVERSE_DFT_MULTIPLIER);
|
||||
#endif
|
||||
|
||||
cstore(output, GET_INDEX(OUTPUT, ORDER), OUTPUT_X_PITCH, Y);
|
||||
}
|
||||
|
||||
#undef real
|
||||
#undef imag
|
||||
#undef cmult
|
||||
#undef crmult
|
||||
#undef cadd
|
||||
#undef expi
|
||||
#undef expmi
|
||||
#undef cload
|
||||
#undef cstore
|
||||
#undef czero
|
||||
#undef GET_INDEX
|
||||
#undef ORDER
|
||||
#undef ORDER_K
|
||||
54
src/plugins/intel_gpu/src/plugin/ops/dft.cpp
Normal file
54
src/plugins/intel_gpu/src/plugin/ops/dft.cpp
Normal file
@@ -0,0 +1,54 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <intel_gpu/plugin/common_utils.hpp>
|
||||
#include <intel_gpu/plugin/program.hpp>
|
||||
#include <intel_gpu/primitives/dft.hpp>
|
||||
#include <ngraph/op/constant.hpp>
|
||||
#include <ngraph/op/dft.hpp>
|
||||
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
namespace {
|
||||
|
||||
void createDft(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::dft_kind kind) {
|
||||
p.ValidateInputs(op, {2, 3});
|
||||
|
||||
const auto inputs = p.GetInputPrimitiveIDs(op);
|
||||
const auto layer_name = layer_type_name_ID(op);
|
||||
const auto& op_friendly_name = op->get_friendly_name();
|
||||
const auto& out_shape = op->get_output_shape(0);
|
||||
|
||||
auto axes_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
|
||||
if (!axes_constant) {
|
||||
IE_THROW() << "Unsupported parameter nodes type in " << op_friendly_name << " (" << op->get_type_name() << ")";
|
||||
}
|
||||
auto axes = axes_constant->cast_vector<int64_t>();
|
||||
const uint8_t data_rank = out_shape.size();
|
||||
ov::normalize_axes(op.get(), data_rank - 1, axes);
|
||||
|
||||
const cldnn::dft prim(layer_name, inputs.front(), std::move(axes), out_shape, kind, op_friendly_name);
|
||||
|
||||
p.AddPrimitive(prim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
}
|
||||
|
||||
void CreateDFTOp(Program& p, const std::shared_ptr<ngraph::op::v7::DFT>& op) {
|
||||
createDft(p, op, cldnn::dft_kind::forward);
|
||||
}
|
||||
|
||||
void CreateIDFTOp(Program& p, const std::shared_ptr<ngraph::op::v7::IDFT>& op) {
|
||||
createDft(p, op, cldnn::dft_kind::inverse);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_FACTORY_IMPL(v7, DFT);
|
||||
REGISTER_FACTORY_IMPL(v7, IDFT);
|
||||
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
1591
src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp
Normal file
1591
src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,65 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <common_test_utils/test_constants.hpp>
|
||||
#include <single_layer_tests/dft.hpp>
|
||||
#include <vector>
|
||||
|
||||
namespace {
|
||||
|
||||
const std::vector<ngraph::helpers::DFTOpType> opTypes = {ngraph::helpers::DFTOpType::FORWARD,
|
||||
ngraph::helpers::DFTOpType::INVERSE};
|
||||
const std::vector<InferenceEngine::Precision> inputPrecision = {InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::Precision::FP16};
|
||||
const auto combine = [](const std::vector<InferenceEngine::SizeVector>& inputShapes,
|
||||
const std::vector<std::vector<int64_t>>& axes,
|
||||
const std::vector<std::vector<int64_t>>& signalSizes) {
|
||||
return testing::Combine(testing::ValuesIn(inputShapes),
|
||||
testing::ValuesIn(inputPrecision),
|
||||
testing::ValuesIn(axes),
|
||||
testing::ValuesIn(signalSizes),
|
||||
testing::ValuesIn(opTypes),
|
||||
testing::Values(CommonTestUtils::DEVICE_GPU));
|
||||
};
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_DFT_2d,
|
||||
DFTLayerTest,
|
||||
combine({{10, 2}, {1, 2}}, // input shapes
|
||||
{{0}, {-1}}, // axes
|
||||
{{}, {5}}), // signal sizes
|
||||
DFTLayerTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_DFT_3d,
|
||||
DFTLayerTest,
|
||||
combine({{10, 4, 2}, {1, 17, 2}}, // input shapes
|
||||
{{0, 1}, {-1, -2}}, // axes
|
||||
{{}, {5, 2}}), // signal sizes
|
||||
DFTLayerTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_DFT_4d,
|
||||
DFTLayerTest,
|
||||
combine({{10, 4, 8, 2}, {1, 17, 12, 2}}, // input shapes
|
||||
{{0, 1, 2}, {-1, -2, -3}}, // axes
|
||||
{{}, {5, 2, 5}}), // signal sizes
|
||||
DFTLayerTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_DFT_5d,
|
||||
DFTLayerTest,
|
||||
combine({{10, 4, 8, 2, 2}, {1, 17, 12, 1, 2}}, // input shapes
|
||||
{{0, 1, 2, 3}, {-1, -2, -3, -4}}, // axes
|
||||
{{}, {5, 2, 5, 20}}), // signal sizes
|
||||
DFTLayerTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_DFT_6d,
|
||||
DFTLayerTest,
|
||||
combine({{10, 4, 8, 2, 5, 2}, {1, 17, 12, 1, 7, 2}}, // input shapes
|
||||
{{0, 1, 2, 3, 4}, {-1, -2, -3, -4, -5}}, // axes
|
||||
{{}, {5, 2, 5, 20, 10}}), // signal sizes
|
||||
DFTLayerTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_DFT_6d_zero,
|
||||
DFTLayerTest,
|
||||
combine({{10, 4, 8, 2, 5, 2}, {1, 17, 12, 1, 7, 2}}, // input shapes
|
||||
{{}}, // axes
|
||||
{{}}), // signal sizes
|
||||
DFTLayerTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
Reference in New Issue
Block a user