[GPU] (I)Dft with single layer test (#9891)

* dft with single layer test * idft with single layer test * fix output param usage in dft * update dft according to the clang-format * move output layout setup to calc_output_layout * add support for other dimensions * add clDNN unit test for DFT/IDFT * remove unnecessary original rank * use defined formats in kernel * fix dft docs * changes after review * Revert "fix dft docs" This reverts commit 45b05172dfd161d92dae6d26e0f1b74748e56fd5. Co-authored-by: Serhii Pavlovskyi <spavlovskyi@lohika.com> Co-authored-by: Mykhailo Hnap <mhnap@lohika.com>
2022-05-25 10:24:46 +03:00
parent e767e9e243
commit 4b08ce4787
16 changed files with 2279 additions and 0 deletions
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
@@ -211,8 +211,10 @@ REGISTER_FACTORY(v6, ExperimentalDetectronTopKROIs)
 REGISTER_FACTORY(v6, ExperimentalDetectronGenerateProposalsSingleImage);

 // ------------------------------ Supported v7 ops ------------------------------ //
+REGISTER_FACTORY(v7, DFT);
 REGISTER_FACTORY(v7, Gather);
 REGISTER_FACTORY(v7, Gelu);
+REGISTER_FACTORY(v7, IDFT);

 // ------------------------------ Supported v8 ops ------------------------------ //
 REGISTER_FACTORY(v8, Slice);
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/dft.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/dft.hpp
@@ -0,0 +1,52 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <openvino/core/shape.hpp>
+
+#include "primitive.hpp"
+
+namespace cldnn {
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+
+/// @brief Kind of DFT operation.
+enum class dft_kind {
+    forward,
+    inverse,
+};
+
+/// @brief DFT primitive.
+struct dft : public primitive_base<dft> {
+    CLDNN_DECLARE_PRIMITIVE(dft)
+
+    /// @brief Constructs DFT primitive.
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param axes Axes to perform DFT.
+    /// @param output_shape Output shape.
+    /// @param kind Kind of DFT operation.
+    dft(const primitive_id& id,
+        const primitive_id& input,
+        std::vector<int64_t>&& axes,
+        const ov::Shape& output_shape,
+        dft_kind kind,
+        const primitive_id& ext_prim_id = {},
+        const padding& output_padding = {})
+        : primitive_base(id, {input}, ext_prim_id, output_padding),
+          axes(std::move(axes)),
+          output_shape(output_shape),
+          kind(kind) {}
+
+    std::vector<int64_t> axes;
+    ov::Shape output_shape;
+    dft_kind kind;
+};
+
+}  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/dft.cpp
+++ b/src/plugins/intel_gpu/src/graph/dft.cpp
@@ -0,0 +1,40 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <dft_inst.h>
+#include <primitive_type_base.h>
+
+#include "json_object.h"
+
+namespace cldnn {
+
+primitive_type_id dft::type_id() {
+    static primitive_type_base<dft> instance;
+    return &instance;
+}
+
+layout typed_primitive_inst<dft>::calc_output_layout(const dft_node& node) {
+    auto primitive = node.get_primitive();
+    auto input_layout = node.input().get_output_layout();
+
+    std::vector<tensor::value_type> dims_converted(primitive->output_shape.begin(), primitive->output_shape.end());
+    auto output_format = input_layout.format;
+
+    // Extend shape to 4d by pushing ones before the last dim
+    for (auto i = dims_converted.size(); i < 4; ++i) {
+        dims_converted.insert(std::prev(dims_converted.end()), 1);
+    }
+
+    return {input_layout.data_type, output_format, tensor(output_format, dims_converted)};
+}
+
+std::string typed_primitive_inst<dft>::to_string(const dft_node& node) {
+    auto desc = node.get_primitive();
+    auto node_info = node.desc_to_json();
+    std::ostringstream os;
+    node_info->dump(os);
+    return os.str();
+}
+
+}  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp
@@ -0,0 +1,63 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <dft/dft_kernel_ref.h>
+#include <dft/dft_kernel_selector.h>
+#include <dft_inst.h>
+#include <kernel_selector_helper.h>
+
+#include <impls/implementation_map.hpp>
+#include <intel_gpu/runtime/error_handler.hpp>
+
+#include "primitive_base.hpp"
+
+namespace cldnn {
+namespace ocl {
+
+struct dft_impl : typed_primitive_impl_ocl<dft> {
+    using typed_primitive_impl_ocl::typed_primitive_impl_ocl;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<dft_impl>(*this);
+    }
+
+    static primitive_impl* create(const dft_node& arg) {
+        auto params = get_default_params<kernel_selector::dft_params>(arg);
+        auto primitive = arg.get_primitive();
+        params.axes = primitive->axes;
+        if (primitive->kind == dft_kind::inverse) {
+            params.kind = kernel_selector::dft_params::inverse;
+        }
+        auto optional_params = get_default_optional_params<kernel_selector::dft_optional_params>(arg.get_program());
+
+        auto& kernel_selector = kernel_selector::dft_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(params, optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        return new dft_impl{arg, best_kernels.front()};
+    }
+};
+
+namespace detail {
+
+attach_dft_impl::attach_dft_impl() {
+    implementation_map<dft>::add(impl_types::ocl,
+                                 dft_impl::create,
+                                 {
+                                     std::make_tuple(data_types::f16, format::bfyx),
+                                     std::make_tuple(data_types::f16, format::bfzyx),
+                                     std::make_tuple(data_types::f16, format::bfwzyx),
+                                     std::make_tuple(data_types::f32, format::bfyx),
+                                     std::make_tuple(data_types::f32, format::bfzyx),
+                                     std::make_tuple(data_types::f32, format::bfwzyx),
+                                 });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp
@@ -28,6 +28,7 @@ void register_implementations() {
    REGISTER_OCL(deformable_interp);
    REGISTER_OCL(depth_to_space);
    REGISTER_OCL(detection_output);
+    REGISTER_OCL(dft);
    REGISTER_OCL(batch_to_space);
    REGISTER_OCL(experimental_detectron_generate_proposals_single_image);
    REGISTER_OCL(experimental_detectron_roi_feature_extractor);
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp
@@ -101,6 +101,7 @@ REGISTER_OCL(deformable_conv);
 REGISTER_OCL(deformable_interp);
 REGISTER_OCL(depth_to_space);
 REGISTER_OCL(detection_output);
+REGISTER_OCL(dft);
 REGISTER_OCL(experimental_detectron_generate_proposals_single_image);
 REGISTER_OCL(experimental_detectron_roi_feature_extractor);
 REGISTER_OCL(experimental_detectron_topk_rois);
--- a/src/plugins/intel_gpu/src/graph/include/dft_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/dft_inst.h
@@ -0,0 +1,33 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <intel_gpu/primitives/dft.hpp>
+
+#include "primitive_inst.h"
+
+namespace cldnn {
+
+template <>
+struct typed_program_node<dft> : public typed_program_node_base<dft> {
+    using typed_program_node_base::typed_program_node_base;
+
+    program_node& input() const {
+        return get_dependency(0);
+    }
+};
+
+using dft_node = typed_program_node<dft>;
+
+template <>
+class typed_primitive_inst<dft> : public typed_primitive_inst_base<dft> {
+public:
+    using typed_primitive_inst_base::typed_primitive_inst_base;
+
+    static layout calc_output_layout(const dft_node& node);
+    static std::string to_string(const dft_node& node);
+};
+
+}  // namespace cldnn
--- a/src/plugins/intel_gpu/src/kernel_selector/common/common_types.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/common/common_types.h
@@ -17,6 +17,7 @@ enum class KernelType {
    AVERAGE_UNPOOLING,
    CONVOLUTION,
    DECONVOLUTION,
+    DFT,
    LRN,
    NORMALIZE,
    POOLING,
--- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/dft/dft_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/dft/dft_kernel_ref.cpp
@@ -0,0 +1,160 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "dft_kernel_ref.h"
+
+#include <kernel_selector_utils.h>
+
+namespace kernel_selector {
+
+namespace {
+
+CommonDispatchData SetDefault(const dft_params& params) {
+    CommonDispatchData dispatch_data;
+    const auto in_layout = params.inputs.front().GetLayout();
+    const auto& output = params.outputs.front();
+    const auto out_layout = output.GetLayout();
+    std::vector<std::vector<Tensor::DataChannelName>> dims_by_gws;
+
+    // We are skipping X, since it contains complex pairs and always has dimension 2
+    switch (out_layout) {
+    case DataLayout::bfyx:
+        dispatch_data.gws = {output.Y().v, output.Feature().v, output.Batch().v};
+        dims_by_gws = {{Tensor::DataChannelName::Y},
+                       {Tensor::DataChannelName::FEATURE},
+                       {Tensor::DataChannelName::BATCH}};
+        break;
+    case DataLayout::bfzyx:
+        dispatch_data.gws = {output.Y().v, output.Z().v, output.Feature().v * output.Batch().v};
+        dims_by_gws = {{Tensor::DataChannelName::Y},
+                       {Tensor::DataChannelName::Z},
+                       {Tensor::DataChannelName::FEATURE, Tensor::DataChannelName::BATCH}};
+        break;
+    case DataLayout::bfwzyx:
+        dispatch_data.gws = {output.Y().v, output.Z().v * output.W().v, output.Feature().v * output.Batch().v};
+        dims_by_gws = {{Tensor::DataChannelName::Y},
+                       {Tensor::DataChannelName::Z, Tensor::DataChannelName::W},
+                       {Tensor::DataChannelName::FEATURE, Tensor::DataChannelName::BATCH}};
+        break;
+    default:
+        throw std::invalid_argument("Unsupported data layout for dft primitive");
+    }
+
+    dispatch_data.lws =
+        GetOptimalLocalWorkGroupSizes(dispatch_data.gws, params.engineInfo, in_layout, out_layout, dims_by_gws);
+
+    return dispatch_data;
+}
+
+template <class T>
+void MakeJitConstForAxis(JitConstants& jit, const DataLayout& layout, int64_t index, T value) {
+    std::string name = "AXIS";
+    switch (index) {
+    case 0:
+        jit.AddConstant(MakeJitConstant(name + "_BATCH", value));
+        break;
+    case 1:
+        jit.AddConstant(MakeJitConstant(name + "_FEATURE", value));
+        break;
+    case 2:
+        if (layout == DataLayout::bfwzyx) {
+            jit.AddConstant(MakeJitConstant(name + "_W", value));
+        } else if (layout == DataLayout::bfzyx) {
+            jit.AddConstant(MakeJitConstant(name + "_Z", value));
+        } else {  // DataLayout::bfyx
+            jit.AddConstant(MakeJitConstant(name + "_Y", value));
+        }
+        break;
+    case 3:
+        if (layout == DataLayout::bfwzyx) {
+            jit.AddConstant(MakeJitConstant(name + "_Z", value));
+        } else {  // DataLayout::bfzyx
+            jit.AddConstant(MakeJitConstant(name + "_Y", value));
+        }
+        break;
+    case 4:
+        jit.AddConstant(MakeJitConstant(name + "_Y", value));
+        break;
+    default:
+        throw std::invalid_argument("Unsupported axis for dft primitive");
+    }
+}
+
+}  // namespace
+
+KernelsData DFTKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
+    KernelsData kernels_data;
+    if (!Validate(params, options)) {
+        return kernels_data;
+    }
+    kernels_data.push_back(KernelData::Default<dft_params>(params));
+    KernelData& kernel_data = kernels_data.front();
+    auto& derived_params = dynamic_cast<dft_params&>(*kernel_data.params.get());
+    auto dispatch_data = SetDefault(derived_params);
+    auto entry_point = GetEntryPoint(kernelName, derived_params.layerID, params, options);
+    auto jit_constants = GetJitConstants(derived_params);
+    auto jit = CreateJit(kernelName, jit_constants, entry_point);
+    auto& clKernelData = kernel_data.kernels[0];
+    FillCLKernelData(clKernelData, dispatch_data, params.engineInfo, kernelName, jit, entry_point);
+    return kernels_data;
+}
+
+KernelsPriority DFTKernelRef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const {
+    return DONT_USE_IF_HAVE_SOMETHING_ELSE;
+}
+
+ParamsKey DFTKernelRef::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::F16);
+    k.EnableInputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableInputLayout(DataLayout::bfyx);
+    k.EnableInputLayout(DataLayout::bfzyx);
+    k.EnableInputLayout(DataLayout::bfwzyx);
+    k.EnableOutputLayout(DataLayout::bfyx);
+    k.EnableOutputLayout(DataLayout::bfzyx);
+    k.EnableOutputLayout(DataLayout::bfwzyx);
+    k.EnableBatching();
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    return k;
+}
+
+bool DFTKernelRef::Validate(const Params& p, const optional_params& o) const {
+    if (p.GetType() != KernelType::DFT || o.GetType() != KernelType::DFT) {
+        return false;
+    }
+
+    auto& params = dynamic_cast<const dft_params&>(p);
+    if (params.inputs.size() != 1) {
+        return false;
+    }
+
+    return true;
+}
+
+JitConstants DFTKernelRef::GetJitConstants(const dft_params& params) const {
+    auto jit = MakeBaseParamsJitConstants(params);
+    const auto out_layout = params.outputs.front().GetLayout();
+    const auto out_sizes = params.outputs.front().LogicalDims();
+    const auto in_sizes = params.inputs.front().LogicalDims();
+
+    // We are skipping X, since it contains complex pairs and should not be in axes
+    const auto dims_size = in_sizes.size() - 1;
+
+    size_t s = 1;
+    for (auto axis : params.axes) {
+        // opencl kernels have inverted order of dimensions with respect to axis spec: x is smallest index, b is largest
+        auto inverted_axis = dims_size - axis;
+        s *= out_sizes[inverted_axis];
+        MakeJitConstForAxis(jit, out_layout, axis, std::min(out_sizes[inverted_axis], in_sizes[inverted_axis]));
+    }
+    if (params.kind == dft_params::inverse) {
+        jit.AddConstant(MakeJitConstant("INVERSE_DFT_MULTIPLIER", 1.f / s));
+    }
+    return jit;
+}
+
+}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/dft/dft_kernel_ref.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/dft/dft_kernel_ref.h
@@ -0,0 +1,35 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "kernel_base_opencl.h"
+
+namespace kernel_selector {
+
+struct dft_params : public base_params {
+    std::vector<int64_t> axes;
+    enum Kind {
+        forward,
+        inverse,
+    } kind = forward;
+    dft_params() : base_params{KernelType::DFT} {}
+};
+
+struct dft_optional_params : optional_params {
+    dft_optional_params() : optional_params{KernelType::DFT} {}
+};
+
+class DFTKernelRef : public KernelBaseOpenCL {
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
+    ParamsKey GetSupportedKey() const override;
+    bool Validate(const Params& p, const optional_params& o) const override;
+    JitConstants GetJitConstants(const dft_params& params) const;
+
+public:
+    DFTKernelRef() : KernelBaseOpenCL{"dft_ref"} {}
+};
+
+}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/dft/dft_kernel_selector.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/dft/dft_kernel_selector.cpp
@@ -0,0 +1,24 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "dft_kernel_selector.h"
+
+#include "dft_kernel_ref.h"
+
+namespace kernel_selector {
+
+dft_kernel_selector::dft_kernel_selector() {
+    Attach<DFTKernelRef>();
+}
+
+KernelsData dft_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
+    return GetNaiveBestKernel(params, options, KernelType::DFT);
+}
+
+dft_kernel_selector& dft_kernel_selector::Instance() {
+    static dft_kernel_selector instance;
+    return instance;
+}
+
+}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/dft/dft_kernel_selector.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/dft/dft_kernel_selector.h
@@ -0,0 +1,18 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <kernel_selector.h>
+
+namespace kernel_selector {
+
+class dft_kernel_selector : public kernel_selector_base {
+public:
+    dft_kernel_selector();
+    KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    static dft_kernel_selector& Instance();
+};
+
+}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/dft_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/dft_ref.cl
@@ -0,0 +1,139 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#define GET_INDEX(prefix, ORDER) CAT(prefix, _GET_INDEX)(ORDER)
+
+// alternative: https://github.com/OpenCL/ComplexMath/blob/master/clcomplex.h
+typedef float2 cfloat;
+#define real(a)                     ((a).s0)
+#define imag(a)                     ((a).s1)
+#define cmult(a, b)                 ((cfloat)(real(a) * real(b) - imag(a) * imag(b), real(a) * imag(b) + imag(a) * real(b)))
+#define crmult(a, b)                ((cfloat)(real(a) * (b), imag(a) * (b)))
+#define cadd(a, b)                  ((cfloat)(real(a) + real(b), imag(a) + imag(b)))
+#define expi(x)                     ((cfloat)(cos(x), sin(x)))
+#define expmi(x)                    ((cfloat)(cos(x), -sin(x)))
+#define cload(p, offset, pitch)     ((cfloat)((p)[offset], (p)[(offset) + (pitch)]))
+#define cstore(p, offset, pitch, x) ((p)[offset] = real(x), (p)[(offset) + (pitch)] = imag(x))
+#define czero()                     ((cfloat)(0))
+
+// TODO: pregenerate e{r,i} array on host in macro. maybe it could be done with kernel which runs once?
+KERNEL(dft_ref)(const __global INPUT0_TYPE* data, __global OUTPUT_TYPE* output) {
+    const uint dim0 = get_global_id(0);
+    const uint dim1 = get_global_id(1);
+    const uint dim2 = get_global_id(2);
+
+    const uint x = 0;
+    const uint y = dim0;
+#if OUTPUT_DIMS == 4
+#    define ORDER   b, f, y, x
+#    define ORDER_K kb, kf, ky, x
+    const uint f = dim1;
+    const uint b = dim2;
+#elif OUTPUT_DIMS == 5
+#    define ORDER   b, f, z, y, x
+#    define ORDER_K kb, kf, kz, ky, x
+    const uint z = dim1;
+    const uint f = dim2 % OUTPUT_FEATURE_NUM;
+    const uint b = dim2 / OUTPUT_FEATURE_NUM;
+#elif OUTPUT_DIMS == 6
+#    define ORDER   b, f, w, z, y, x
+#    define ORDER_K kb, kf, kw, kz, ky, x
+    const uint z = dim1 % OUTPUT_SIZE_Z;
+    const uint w = dim1 / OUTPUT_SIZE_Z;
+    const uint f = dim2 % OUTPUT_FEATURE_NUM;
+    const uint b = dim2 / OUTPUT_FEATURE_NUM;
+#endif
+
+    // TODO: use OUTPUT_TYPE for intermediate calculations?
+    // We don't use it for now as we will lose a lot of precision for f16 and tests won't pass
+    cfloat Y = czero();
+    const float PI2 = M_PI_F * 2;
+
+#ifdef AXIS_Y
+    const float ay = PI2 * y / OUTPUT_SIZE_Y;
+#endif
+#ifdef AXIS_Z
+    const float az = PI2 * z / OUTPUT_SIZE_Z;
+#endif
+#ifdef AXIS_W
+    const float aw = PI2 * w / OUTPUT_SIZE_W;
+#endif
+#ifdef AXIS_FEATURE
+    const float af = PI2 * f / OUTPUT_FEATURE_NUM;
+#endif
+#ifdef AXIS_BATCH
+    const float ab = PI2 * b / OUTPUT_BATCH_NUM;
+#endif
+
+#ifdef AXIS_BATCH
+    for (uint kb = 0; kb < AXIS_BATCH; ++kb)
+#else
+#    define kb b
+#endif
+#ifdef AXIS_FEATURE
+        for (uint kf = 0; kf < AXIS_FEATURE; ++kf)
+#else
+#    define kf f
+#endif
+#ifdef AXIS_W
+            for (uint kw = 0; kw < AXIS_W; ++kw)
+#else
+#    define kw w
+#endif
+#ifdef AXIS_Z
+                for (uint kz = 0; kz < AXIS_Z; ++kz)
+#else
+#    define kz z
+#endif
+#ifdef AXIS_Y
+                    for (uint ky = 0; ky < AXIS_Y; ++ky)
+#else
+#    define ky y
+#endif
+                    {
+                        float a = 0;
+#ifdef AXIS_Y
+                        a += ay * ky;
+#endif
+#ifdef AXIS_Z
+                        a += az * kz;
+#endif
+#ifdef AXIS_W
+                        a += aw * kw;
+#endif
+#ifdef AXIS_FEATURE
+                        a += af * kf;
+#endif
+#ifdef AXIS_BATCH
+                        a += ab * kb;
+#endif
+                        const cfloat X = cload(data, GET_INDEX(INPUT0, ORDER_K), INPUT0_X_PITCH);
+#ifdef INVERSE_DFT_MULTIPLIER
+                        const cfloat E = expi(a);
+#else
+        const cfloat E = expmi(a);
+#endif
+                        Y = cadd(Y, cmult(X, E));
+                    }
+
+#ifdef INVERSE_DFT_MULTIPLIER
+    Y = crmult(Y, INVERSE_DFT_MULTIPLIER);
+#endif
+
+    cstore(output, GET_INDEX(OUTPUT, ORDER), OUTPUT_X_PITCH, Y);
+}
+
+#undef real
+#undef imag
+#undef cmult
+#undef crmult
+#undef cadd
+#undef expi
+#undef expmi
+#undef cload
+#undef cstore
+#undef czero
+#undef GET_INDEX
+#undef ORDER
+#undef ORDER_K
--- a/src/plugins/intel_gpu/src/plugin/ops/dft.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/dft.cpp
@@ -0,0 +1,54 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <intel_gpu/plugin/common_utils.hpp>
+#include <intel_gpu/plugin/program.hpp>
+#include <intel_gpu/primitives/dft.hpp>
+#include <ngraph/op/constant.hpp>
+#include <ngraph/op/dft.hpp>
+
+namespace ov {
+namespace runtime {
+namespace intel_gpu {
+
+namespace {
+
+void createDft(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::dft_kind kind) {
+    p.ValidateInputs(op, {2, 3});
+
+    const auto inputs = p.GetInputPrimitiveIDs(op);
+    const auto layer_name = layer_type_name_ID(op);
+    const auto& op_friendly_name = op->get_friendly_name();
+    const auto& out_shape = op->get_output_shape(0);
+
+    auto axes_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
+    if (!axes_constant) {
+        IE_THROW() << "Unsupported parameter nodes type in " << op_friendly_name << " (" << op->get_type_name() << ")";
+    }
+    auto axes = axes_constant->cast_vector<int64_t>();
+    const uint8_t data_rank = out_shape.size();
+    ov::normalize_axes(op.get(), data_rank - 1, axes);
+
+    const cldnn::dft prim(layer_name, inputs.front(), std::move(axes), out_shape, kind, op_friendly_name);
+
+    p.AddPrimitive(prim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+void CreateDFTOp(Program& p, const std::shared_ptr<ngraph::op::v7::DFT>& op) {
+    createDft(p, op, cldnn::dft_kind::forward);
+}
+
+void CreateIDFTOp(Program& p, const std::shared_ptr<ngraph::op::v7::IDFT>& op) {
+    createDft(p, op, cldnn::dft_kind::inverse);
+}
+
+}  // namespace
+
+REGISTER_FACTORY_IMPL(v7, DFT);
+REGISTER_FACTORY_IMPL(v7, IDFT);
+
+}  // namespace intel_gpu
+}  // namespace runtime
+}  // namespace ov
--- a/src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/dft.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/dft.cpp
@@ -0,0 +1,65 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <common_test_utils/test_constants.hpp>
+#include <single_layer_tests/dft.hpp>
+#include <vector>
+
+namespace {
+
+const std::vector<ngraph::helpers::DFTOpType> opTypes = {ngraph::helpers::DFTOpType::FORWARD,
+                                                         ngraph::helpers::DFTOpType::INVERSE};
+const std::vector<InferenceEngine::Precision> inputPrecision = {InferenceEngine::Precision::FP32,
+                                                                InferenceEngine::Precision::FP16};
+const auto combine = [](const std::vector<InferenceEngine::SizeVector>& inputShapes,
+                        const std::vector<std::vector<int64_t>>& axes,
+                        const std::vector<std::vector<int64_t>>& signalSizes) {
+    return testing::Combine(testing::ValuesIn(inputShapes),
+                            testing::ValuesIn(inputPrecision),
+                            testing::ValuesIn(axes),
+                            testing::ValuesIn(signalSizes),
+                            testing::ValuesIn(opTypes),
+                            testing::Values(CommonTestUtils::DEVICE_GPU));
+};
+
+using namespace LayerTestsDefinitions;
+
+INSTANTIATE_TEST_SUITE_P(smoke_DFT_2d,
+                         DFTLayerTest,
+                         combine({{10, 2}, {1, 2}},  // input shapes
+                                 {{0}, {-1}},        // axes
+                                 {{}, {5}}),         // signal sizes
+                         DFTLayerTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_DFT_3d,
+                         DFTLayerTest,
+                         combine({{10, 4, 2}, {1, 17, 2}},  // input shapes
+                                 {{0, 1}, {-1, -2}},        // axes
+                                 {{}, {5, 2}}),             // signal sizes
+                         DFTLayerTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_DFT_4d,
+                         DFTLayerTest,
+                         combine({{10, 4, 8, 2}, {1, 17, 12, 2}},  // input shapes
+                                 {{0, 1, 2}, {-1, -2, -3}},        // axes
+                                 {{}, {5, 2, 5}}),                 // signal sizes
+                         DFTLayerTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_DFT_5d,
+                         DFTLayerTest,
+                         combine({{10, 4, 8, 2, 2}, {1, 17, 12, 1, 2}},  // input shapes
+                                 {{0, 1, 2, 3}, {-1, -2, -3, -4}},       // axes
+                                 {{}, {5, 2, 5, 20}}),                   // signal sizes
+                         DFTLayerTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_DFT_6d,
+                         DFTLayerTest,
+                         combine({{10, 4, 8, 2, 5, 2}, {1, 17, 12, 1, 7, 2}},  // input shapes
+                                 {{0, 1, 2, 3, 4}, {-1, -2, -3, -4, -5}},      // axes
+                                 {{}, {5, 2, 5, 20, 10}}),                     // signal sizes
+                         DFTLayerTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_DFT_6d_zero,
+                         DFTLayerTest,
+                         combine({{10, 4, 8, 2, 5, 2}, {1, 17, 12, 1, 7, 2}},  // input shapes
+                                 {{}},                                         // axes
+                                 {{}}),                                        // signal sizes
+                         DFTLayerTest::getTestCaseName);
+
+}  // namespace