[IE CLDNN] Implement ExtractImagePatches operation (#1127)

The ExtractImagePatches operation collects patches from the input tensor, as if applying a convolution. All extracted patches are stacked in the depth dimension of the output. JIRA: 30055
2020-06-29 00:36:30 -07:00 · 2020-06-29 00:36:30 -07:00 · 08cd0f7779
commit 08cd0f7779
parent d0be6b1d2f
20 changed files with 1303 additions and 7 deletions
--- a/inference-engine/src/cldnn_engine/cldnn_program.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp
@ -66,6 +66,7 @@
 #include <api/ctc_greedy_decoder.hpp>
 #include <api/cum_sum.hpp>
 #include <api/embedding_bag.hpp>
+#include <api/extract_image_patches.hpp>

 #include <chrono>
 #include <cmath>
@ -605,6 +606,7 @@ Program::LayerType Program::LayerTypeFromStr(const std::string &str) {
        { "EmbeddingBagPackedSum", EmbeddingBagPackedSum },
        { "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum },
        { "EmbeddingSegmentsSum", EmbeddingSegmentsSum },
+        { "ExtractImagePatches" , ExtractImagePatches },
    };
    auto it = LayerNameToType.find(str);
    if (it != LayerNameToType.end())
@ -1297,6 +1299,8 @@ void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, InferenceEng
            break;
        case EmbeddingSegmentsSum: CreateEmbeddingSegmentsSumPrimitive(topology, layer);
            break;
+        case ExtractImagePatches: CreateExtractImagePatchesPrimitive(topology, layer);
+            break;
        default: THROW_CLDNN_EXCEPTION("Unknown Layer Type: " << layer->type);
    }
 }
@ -4889,6 +4893,32 @@ void Program::CreateEmbeddingSegmentsSumPrimitive(cldnn::topology& topology, Inf
    AddPrimitiveToProfiler(layerName, layer);
 }

+void Program::CreateExtractImagePatchesPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer) {
+    ValidateLayer(layer, 1);
+
+    auto inputPrimitives = GetPrevLayersPrimitives(layer);
+    auto eipLayer = as<InferenceEngine::GenericLayer*>(layer);
+
+    std::vector<unsigned int> sizes = eipLayer->GetParamAsUInts("sizes");
+    std::vector<unsigned int> strides = eipLayer->GetParamAsUInts("strides");
+    std::vector<unsigned int> rates = eipLayer->GetParamAsUInts("rates");
+    std::string auto_pad = eipLayer->GetParamAsString("auto_pad");
+
+    std::string eipLayerName = layer_type_name_ID(layer);
+
+    auto extractImagePatchesPrim = cldnn::extract_image_patches(
+        eipLayerName,
+        inputPrimitives[0],
+        sizes,
+        strides,
+        rates,
+        auto_pad,
+        CldnnTensorFromIEDims(eipLayer->outData[0]->getTensorDesc().getDims()));
+
+    topology.add(extractImagePatchesPrim);
+    AddPrimitiveToProfiler(eipLayerName, layer);
+}
+
 bool Program::IsValidSplitConvMerge(const InferenceEngine::SplitLayer *splitLayer) const {
    if (splitLayer->outData.size() != 2) return false;  // split into 2

--- a/inference-engine/src/cldnn_engine/cldnn_program.h
+++ b/inference-engine/src/cldnn_engine/cldnn_program.h
@ -221,6 +221,7 @@ public:
        EmbeddingBagPackedSum,
        EmbeddingBagOffsetsSum,
        EmbeddingSegmentsSum,
+        ExtractImagePatches,
        NO_TYPE
    };
    using GenericBlobMap = std::map<cldnn::primitive_id, cldnn::primitive_id>;
@ -382,6 +383,7 @@ private:
    void CreateEmbeddingBagPackedSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
    void CreateEmbeddingBagOffsetsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
    void CreateEmbeddingSegmentsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
+    void CreateExtractImagePatchesPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 };

 }  // namespace CLDNNPlugin
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/extract_image_patches.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/extract_image_patches.cpp
@ -0,0 +1,70 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/extract_image_patches.hpp"
+
+using namespace LayerTestsDefinitions;
+using ngraph::op::PadType;
+
+namespace {
+
+const std::vector<std::vector<size_t>> inDataShape = {
+    {1, 1, 10, 10},
+    {1, 3, 10, 10}
+};
+const std::vector<std::vector<size_t>> kernels = {
+    {2, 2},
+    {3, 3},
+    {4, 4},
+    {1, 3},
+    {4, 2}
+};
+const std::vector<std::vector<size_t>> strides = {
+    {3, 3},
+    {5, 5},
+    {9, 9},
+    {1, 3},
+    {6, 2}
+};
+const std::vector<std::vector<size_t>> rates = {
+    {1, 1},
+    {1, 2},
+    {2, 1},
+    {2, 2}
+};
+const std::vector<PadType> autoPads = {
+    PadType::VALID,
+    PadType::SAME_UPPER,
+    PadType::SAME_LOWER
+};
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    //InferenceEngine::Precision::I8,
+    InferenceEngine::Precision::U8,
+    InferenceEngine::Precision::I16,
+    InferenceEngine::Precision::I32,
+    InferenceEngine::Precision::FP32
+};
+
+const auto extractImagePatchesParamsSet = ::testing::Combine(
+        ::testing::ValuesIn(inDataShape),
+        ::testing::ValuesIn(kernels),
+        ::testing::ValuesIn(strides),
+        ::testing::ValuesIn(rates),
+        ::testing::ValuesIn(autoPads)
+);
+
+INSTANTIATE_TEST_CASE_P(layers_GPU, ExtractImagePatchesTest,
+        ::testing::Combine(
+            ::testing::ValuesIn(inDataShape),
+            ::testing::ValuesIn(kernels),
+            ::testing::ValuesIn(strides),
+            ::testing::ValuesIn(rates),
+            ::testing::ValuesIn(autoPads),
+            ::testing::ValuesIn(netPrecisions),
+            ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+        ExtractImagePatchesTest::getTestCaseName);
+
+}  // namespace
--- a/inference-engine/thirdparty/clDNN/api/extract_image_patches.hpp
+++ b/inference-engine/thirdparty/clDNN/api/extract_image_patches.hpp
@ -0,0 +1,79 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "primitive.hpp"
+
+namespace cldnn {
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+
+/// @brief The ExtractImagePatches operation collects patches from the input tensor, as if applying a convolution. All extracted patches are stacked in the depth dimension of the output.
+/// @details The ExtractImagePatches operation is similar to the TensorFlow*
+/// operation ExtractImagePatches.
+/// This op extracts patches of shape `sizes` which are `strides` apart in the
+/// input image. The output elements are taken from the input at intervals
+/// given by the `rate` argument, as in dilated convolutions.
+/// The result is a 4D tensor containing image patches with size
+/// `size[0] * size[1] * depth` vectorized in the "depth" dimension.
+/// The "auto_pad" attribute has no effect on the size of each patch, it
+/// determines how many patches are extracted.
+struct extract_image_patches : public primitive_base<extract_image_patches> {
+    CLDNN_DECLARE_PRIMITIVE(extract_image_patches)
+
+    /// @brief Constructs select primitive.
+    /// @param id This primitive id.
+    /// @param input Input primitive id containing input 4-D tensor.
+    /// @param sizes Vector with sizes.
+    /// @param strides Vector with strides.
+    /// @param rates Vector with rates.
+    /// @param auto_pad How the padding is calculated.
+    /// @param output_shape Tensor with shape of output layout
+    extract_image_patches(const primitive_id& id,
+                          const primitive_id& input,
+                          const std::vector<unsigned int>& sizes,
+                          const std::vector<unsigned int>& strides,
+                          const std::vector<unsigned int>& rates,
+                          const std::string& auto_pad,
+                          const tensor& output_shape,
+                          const padding& output_padding = padding())
+        : primitive_base(id, {input}, output_padding),
+          sizes(sizes),
+          strides(strides),
+          rates(rates),
+          auto_pad(auto_pad),
+          output_shape(output_shape) {}
+
+    /// @brief Vector with sizes
+    std::vector<unsigned int> sizes;
+    /// @brief Vector with strides
+    std::vector<unsigned int> strides;
+    /// @brief Vector with rates
+    std::vector<unsigned int> rates;
+    /// @brief Mode how the padding is calculated
+    std::string auto_pad;
+    /// @brief Shape of output layout
+    tensor output_shape;
+};
+/// @}
+/// @}
+/// @}
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
@ -85,7 +85,8 @@ enum class KernelType {
    GRN,
    CTC_GREEDY_DECODER,
    CUM_SUM,
-    EMBEDDING_BAG
+    EMBEDDING_BAG,
+    EXTRACT_IMAGE_PATCHES
 };

 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.cpp
@ -0,0 +1,108 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "extract_image_patches_kernel_base.h"
+#include <vector>
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+ParamsKey ExtractImagePatchesKernelBase::GetSupportedKey() const {
+    ParamsKey k;
+
+    k.EnableAllInputDataType();
+    k.EnableAllOutputDataType();
+    k.EnableInputLayout(DataLayout::bfyx);
+    k.EnableOutputLayout(DataLayout::bfyx);
+
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableBatching();
+    return k;
+}
+
+JitConstants ExtractImagePatchesKernelBase::GetJitConstants(const extract_image_patches_params& params) const {
+    JitConstants jit = MakeBaseParamsJitConstants(params);
+
+    jit.AddConstants({
+        MakeJitConstant("SIZE_ROWS", params.sizes[0]),
+        MakeJitConstant("SIZE_COLS", params.sizes[1]),
+        MakeJitConstant("STRIDE_ROWS", params.strides[0]),
+        MakeJitConstant("STRIDE_COLS", params.strides[1]),
+        MakeJitConstant("RATES_ROWS", params.rates[0]),
+        MakeJitConstant("RATES_COLS", params.rates[1]),
+    });
+    if (params.auto_pad == "same_upper")
+        jit.AddConstant(MakeJitConstant("AUTO_PAD", 1));
+    else if (params.auto_pad == "same_lower")
+        jit.AddConstant(MakeJitConstant("AUTO_PAD", 2));
+
+    return jit;
+}
+
+ExtractImagePatchesKernelBase::DispatchData ExtractImagePatchesKernelBase::SetDefault(const extract_image_patches_params& params) const {
+    DispatchData kd;
+
+    std::vector<size_t> global = { params.output.Batch().v,
+                                   params.output.Feature().v,
+                                   params.output.Y().v * params.output.X().v };
+
+    const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+
+    kd.gws0 = global[0];
+    kd.gws1 = global[1];
+    kd.gws2 = global[2];
+
+    kd.lws0 = local[0];
+    kd.lws1 = local[1];
+    kd.lws2 = local[2];
+
+    return kd;
+}
+
+KernelsData ExtractImagePatchesKernelBase::GetCommonKernelsData(const Params& params,
+                                                                const optional_params& options,
+                                                                float estimated_time) const {
+    if (!Validate(params, options)) {
+        return KernelsData();
+    }
+
+    const auto& prim_params = static_cast<const extract_image_patches_params&>(params);
+
+    auto run_info = SetDefault(prim_params);
+    KernelData kd = KernelData::Default<extract_image_patches_params>(params);
+
+    auto cldnn_jit = GetJitConstants(prim_params);
+    auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
+    auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+    auto& kernel = kd.kernels[0];
+    FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point);
+
+    kd.estimatedTime = estimated_time;
+
+    return {kd};
+}
+
+bool ExtractImagePatchesKernelBase::Validate(const Params& p, const optional_params&) const {
+    const extract_image_patches_params& params = static_cast<const extract_image_patches_params&>(p);
+
+    if (params.GetType() != KernelType::EXTRACT_IMAGE_PATCHES) {
+        return false;
+    }
+
+    return true;
+}
+}  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.h
@ -0,0 +1,58 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "common_kernel_base.h"
+#include "kernel_selector_params.h"
+
+#include <vector>
+
+namespace kernel_selector {
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// extract_image_patches_params
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct extract_image_patches_params : public base_params {
+    extract_image_patches_params() : base_params(KernelType::EXTRACT_IMAGE_PATCHES) {}
+
+    std::vector<unsigned int> sizes;
+    std::vector<unsigned int> strides;
+    std::vector<unsigned int> rates;
+    std::string auto_pad;
+
+    virtual ParamsKey GetParamsKey() const { return base_params::GetParamsKey(); }
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// extract_image_patches_optional_params
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct extract_image_patches_optional_params : optional_params {
+    extract_image_patches_optional_params() : optional_params(KernelType::EXTRACT_IMAGE_PATCHES) {}
+};
+
+class ExtractImagePatchesKernelBase : public common_kernel_base {
+public:
+    using common_kernel_base::common_kernel_base;
+    using DispatchData = CommonDispatchData;
+    virtual ~ExtractImagePatchesKernelBase() {}
+
+protected:
+    virtual ParamsKey GetSupportedKey() const override;
+    virtual JitConstants GetJitConstants(const extract_image_patches_params& params) const;
+    DispatchData SetDefault(const extract_image_patches_params& params) const;
+    KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
+
+    bool Validate(const Params& p, const optional_params&) const override;
+};
+}  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_ref.cpp
@ -0,0 +1,26 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "extract_image_patches_kernel_ref.h"
+#include "kernel_selector_utils.h"
+#include <string>
+#include <vector>
+
+namespace kernel_selector {
+KernelsData ExtractImagePatchesKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
+    return GetCommonKernelsData(params, options, DONT_USE_IF_HAVE_SOMETHING_ELSE);
+}
+}  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_ref.h
@ -0,0 +1,29 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "extract_image_patches_kernel_base.h"
+
+namespace kernel_selector {
+class ExtractImagePatchesKernelRef : public ExtractImagePatchesKernelBase {
+public:
+    ExtractImagePatchesKernelRef() : ExtractImagePatchesKernelBase("extract_image_patches_ref") {}
+    virtual ~ExtractImagePatchesKernelRef() = default;
+protected:
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+};
+}  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_selector.cpp
@ -0,0 +1,26 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "extract_image_patches_kernel_selector.h"
+#include "extract_image_patches_kernel_ref.h"
+
+namespace kernel_selector {
+extract_image_patches_kernel_selector::extract_image_patches_kernel_selector() {
+    Attach<ExtractImagePatchesKernelRef>();
+}
+
+KernelsData extract_image_patches_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
+    return GetNaiveBestKernel(params, options, KernelType::EXTRACT_IMAGE_PATCHES);
+}
+}  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_selector.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_selector.h
@ -0,0 +1,32 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "kernel_selector.h"
+
+namespace kernel_selector {
+class extract_image_patches_kernel_selector : public kernel_selector_base {
+public:
+    static extract_image_patches_kernel_selector& Instance() {
+        static extract_image_patches_kernel_selector instance_;
+        return instance_;
+    }
+
+    extract_image_patches_kernel_selector();
+    virtual ~extract_image_patches_kernel_selector() = default;
+
+    KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+};
+}  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/cum_sum_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/cum_sum_ref.cl
@ -44,9 +44,9 @@ inline uint FUNC(get_output_index)(uint b, uint f, uint w, uint z, uint y, uint

 KERNEL(cum_sum_ref)( const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
 {
-    const uint batch = get_global_id(0);
-    const uint features = get_global_id(1) / OUTPUT_SIZE_W;
-    const uint w = get_global_id(1) % OUTPUT_SIZE_W;
+    const uint batch = (uint)get_global_id(0);
+    const uint features = (uint)get_global_id(1) / OUTPUT_SIZE_W;
+    const uint w = (uint)get_global_id(1) % OUTPUT_SIZE_W;
    const uint yx = (uint)get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
    const uint z = (uint)get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
    const uint y = yx / OUTPUT_SIZE_X;
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/extract_image_patches_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/extract_image_patches_ref.cl
@ -0,0 +1,64 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/include_all.cl"
+
+KERNEL(extract_image_patches_ref)(const __global INPUT0_TYPE* input,
+                                  __global OUTPUT_TYPE* output)
+{
+    const uint batch = (uint)get_global_id(0);
+    const uint out_depth = (uint)get_global_id(1);
+    const uint out_row = (uint)get_global_id(2) / OUTPUT_SIZE_X;
+    const uint out_col = (uint)get_global_id(2) % OUTPUT_SIZE_X;
+
+    int row_padding = 0;
+    int col_padding = 0;
+#ifdef AUTO_PAD
+    uint num_out_rows = OUTPUT_SIZE_Y * STRIDE_ROWS + (SIZE_ROWS * RATES_ROWS - STRIDE_ROWS);
+#if RATES_ROWS > 1
+    --num_out_rows;
+#endif // RATES_ROWS > 1
+    const int row_padding_size = max((int)(num_out_rows - INPUT0_SIZE_Y), 0);
+    uint num_out_cols = OUTPUT_SIZE_X * STRIDE_COLS + (SIZE_COLS * RATES_COLS - STRIDE_COLS);
+#if RATES_COLS > 1
+    --num_out_cols;
+#endif // RATES_COLS > 1
+    const int col_padding_size = max((int)(num_out_cols - INPUT0_SIZE_X), 0);
+    row_padding = row_padding_size / 2;
+    col_padding = col_padding_size / 2;
+#if AUTO_PAD == 2 // same_lower
+    row_padding = (row_padding_size % 2) + row_padding;
+    col_padding = (col_padding_size % 2) + col_padding;
+#endif // AUTO_PAD == 2
+#endif // AUTO_PAD
+
+    const uint cur_row_ind = out_depth / (INPUT0_FEATURE_NUM * SIZE_COLS);
+    const uint row = cur_row_ind +
+               STRIDE_ROWS * out_row - row_padding +
+               (RATES_ROWS - 1) * cur_row_ind;
+    const uint cur_col_ind = (out_depth % (INPUT0_FEATURE_NUM * SIZE_COLS)) / INPUT0_FEATURE_NUM;
+    const uint col = cur_col_ind +
+               STRIDE_COLS * out_col - col_padding +
+               (RATES_COLS - 1) * cur_col_ind;
+
+    const uint depth = out_depth % INPUT0_FEATURE_NUM;
+    const uint in_ind = INPUT0_GET_INDEX_SAFE(batch, depth, row, col);
+    const uint out_ind = OUTPUT_GET_INDEX(batch, out_depth, out_row, out_col);
+    OUTPUT_TYPE res = TO_OUTPUT_TYPE(input[in_ind]);
+#ifdef AUTO_PAD
+    if (row < 0 || col < 0 || row >= INPUT0_SIZE_Y || col >= INPUT0_SIZE_X)
+        res = OUTPUT_VAL_ZERO;
+#endif
+    output[out_ind] = ACTIVATION(res, ACTIVATION_PARAMS);
+}
--- a/inference-engine/thirdparty/clDNN/src/extract_image_patches.cpp
+++ b/inference-engine/thirdparty/clDNN/src/extract_image_patches.cpp
@ -0,0 +1,69 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "extract_image_patches_inst.h"
+
+#include "primitive_type_base.h"
+#include "error_handler.h"
+#include "json_object.h"
+#include <string>
+
+namespace cldnn {
+primitive_type_id extract_image_patches::type_id() {
+    static primitive_type_base<extract_image_patches> instance;
+    return &instance;
+}
+
+layout extract_image_patches_inst::calc_output_layout(extract_image_patches_node const& node) {
+    auto desc = node.get_primitive();
+
+    auto input_layout = node.input(0).get_output_layout();
+    auto input_format = input_layout.format;
+
+    auto output_shape = desc->output_shape;
+    return layout(input_layout.data_type, input_format, output_shape);
+}
+
+std::string extract_image_patches_inst::to_string(extract_image_patches_node const& node) {
+    auto desc = node.get_primitive();
+    auto node_info = node.desc_to_json();
+    auto& input = node.input();
+
+    std::stringstream primitive_description;
+
+    std::stringstream sizes, strides, rates;
+    sizes << desc->sizes[0] << "," << desc->sizes[1];
+    strides << desc->strides[0] << "," << desc->strides[1];
+    rates << desc->rates[0] << "," << desc->rates[1];
+
+    json_composite extract_image_patches_info;
+    extract_image_patches_info.add("input id", input.id());
+    extract_image_patches_info.add("input shape", input.get_output_layout().size.to_string());
+    extract_image_patches_info.add("sizes", sizes.str());
+    extract_image_patches_info.add("strides", strides.str());
+    extract_image_patches_info.add("rates", rates.str());
+    extract_image_patches_info.add("auto_pad", desc->auto_pad);
+    extract_image_patches_info.add("output shape", input.calc_output_layout().size.to_string());
+
+    node_info->add("extract_image_patches info", extract_image_patches_info);
+    node_info->dump(primitive_description);
+
+    return primitive_description.str();
+}
+
+extract_image_patches_inst::typed_primitive_inst(network_impl& network, extract_image_patches_node const& node) : parent(network, node) {}
+
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/extract_image_patches_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/extract_image_patches_gpu.cpp
@ -0,0 +1,72 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "extract_image_patches_inst.h"
+#include "primitive_gpu_base.h"
+#include "implementation_map.h"
+#include "error_handler.h"
+#include "kernel_selector_helper.h"
+
+#include "extract_image_patches/extract_image_patches_kernel_selector.h"
+#include "extract_image_patches/extract_image_patches_kernel_ref.h"
+
+namespace cldnn {
+namespace gpu {
+
+struct extract_image_patches_gpu : typed_primitive_gpu_impl<extract_image_patches> {
+    using parent = typed_primitive_gpu_impl<extract_image_patches>;
+    using parent::parent;
+
+public:
+    static primitive_impl* create(const extract_image_patches_node& arg) {
+        auto params = get_default_params<kernel_selector::extract_image_patches_params>(arg);
+        auto optional_params =
+            get_default_optional_params<kernel_selector::extract_image_patches_optional_params>(arg.get_program());
+
+        params.sizes = arg.get_primitive()->sizes;
+        params.strides = arg.get_primitive()->strides;
+        params.rates = arg.get_primitive()->rates;
+        params.auto_pad = arg.get_primitive()->auto_pad;
+
+        auto& kernel_selector = kernel_selector::extract_image_patches_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(params, optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto extract_image_patches = new extract_image_patches_gpu(arg, best_kernels[0]);
+
+        return extract_image_patches;
+    }
+};
+
+namespace detail {
+
+attach_extract_image_patches_gpu::attach_extract_image_patches_gpu() {
+    implementation_map<extract_image_patches>::add(
+        {{std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), extract_image_patches_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), extract_image_patches_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), extract_image_patches_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), extract_image_patches_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), extract_image_patches_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), extract_image_patches_gpu::create}});
+}
+
+}  // namespace detail
+}  // namespace gpu
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp
@ -101,6 +101,7 @@ void register_implementations_gpu() {
    REGISTER_GPU(ctc_greedy_decoder);
    REGISTER_GPU(cum_sum);
    REGISTER_GPU(embedding_bag);
+    REGISTER_GPU(extract_image_patches);
 }

 }  // namespace gpu
--- a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp
@ -180,6 +180,7 @@ REGISTER_GPU(grn);
 REGISTER_GPU(ctc_greedy_decoder);
 REGISTER_GPU(cum_sum);
 REGISTER_GPU(embedding_bag);
+REGISTER_GPU(extract_image_patches);

 #undef REGISTER_GPU

--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
@ -53,6 +53,7 @@
 #include "strided_slice_inst.h"
 #include "cum_sum_inst.h"
 #include "embedding_bag_inst.h"
+#include "extract_image_patches_inst.h"
 #include <vector>
 #include <list>
 #include <memory>
@ -201,9 +202,10 @@ void prepare_primitive_fusing::fuse_activations(program_impl &p) {
                 !input.is_type<permute>() && !input.is_type<pooling>() && !input.is_type<reorder>() &&
                 !input.is_type<reshape>() && !input.is_type<roi_pooling>() && !input.is_type<scale>() &&
                 !input.is_type<softmax>() && !input.is_type<resample>() && !input.is_type<mvn>() &&
-                 !input.is_type<depth_to_space>() && !input.is_type<batch_to_space>() && !input.is_type<space_to_batch>() &&
-                 !input.is_type<gather>() && !input.is_type<shuffle_channels>() && !input.is_type<strided_slice>() &&
-                 !input.is_type<cum_sum>() && !input.is_type<reverse_sequence>() && !input.is_type<embedding_bag>() &&
+                 !input.is_type<depth_to_space>() && !input.is_type<batch_to_space>() &&
+                 !input.is_type<space_to_batch>() && !input.is_type<gather>() && !input.is_type<shuffle_channels>() &&
+                 !input.is_type<strided_slice>() && !input.is_type<cum_sum>() && !input.is_type<reverse_sequence>() &&
+                 !input.is_type<embedding_bag>() && !input.is_type<extract_image_patches>() &&
                 !input.is_type<fused_conv_eltwise>() && !input.is_type<activation>()))
                return;

--- a/inference-engine/thirdparty/clDNN/src/include/extract_image_patches_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/extract_image_patches_inst.h
@ -0,0 +1,49 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include "api/extract_image_patches.hpp"
+#include "primitive_inst.h"
+
+namespace cldnn {
+template <>
+struct typed_program_node<extract_image_patches> : public typed_program_node_base<extract_image_patches> {
+    using parent = typed_program_node_base<extract_image_patches>;
+
+public:
+    using parent::parent;
+
+    program_node& input(size_t index = 0) const { return get_dependency(index); }
+};
+
+using extract_image_patches_node = typed_program_node<extract_image_patches>;
+
+template <>
+class typed_primitive_inst<extract_image_patches> : public typed_primitive_inst_base<extract_image_patches> {
+    using parent = typed_primitive_inst_base<extract_image_patches>;
+
+public:
+    static layout calc_output_layout(extract_image_patches_node const& node);
+    static std::string to_string(extract_image_patches_node const& node);
+
+public:
+    typed_primitive_inst(network_impl& network, extract_image_patches_node const& desc);
+};
+
+using extract_image_patches_inst = typed_primitive_inst<extract_image_patches>;
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/extract_image_patches_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/extract_image_patches_gpu_test.cpp
@ -0,0 +1,577 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include <gtest/gtest.h>
+
+#include <api/memory.hpp>
+#include <api/input_layout.hpp>
+#include <api/extract_image_patches.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/data.hpp>
+
+#include <test_utils/test_utils.h>
+
+using namespace cldnn;
+using namespace tests;
+
+TEST(extract_image_patches_gpu, basic) {
+    //  Input  : 1x1x10x10
+    //  Output : 1x9x2x2
+
+    tensor output_shape = {1, 9, 2, 2};
+    const auto& engine = get_test_engine();
+    auto batch = 1;
+    auto depth = 1;
+    auto in_rows = 10;
+    auto in_cols = 10;
+    std::vector<unsigned int> sizes = {3, 3};
+    std::vector<unsigned int> strides = {5, 5};
+    std::vector<unsigned int> rates = {1, 1};
+    std::string auto_pad = "valid";
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } });
+
+    std::vector<float> inputVals(batch * depth * in_rows * in_cols);
+    std::generate(inputVals.begin(), inputVals.end(), []() {
+        static float n = 1;
+        return n++;
+    });
+
+    set_values(input, inputVals);
+
+    topology topology;
+    topology.add(input_layout("Input0", input.get_layout()));
+    topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape));
+
+    network network(engine, topology);
+    network.set_input_data("Input0", input);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "extract_image_patches");
+
+    auto output = outputs.at("extract_image_patches").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> answers = {
+         1,  6,
+        51, 56,
+
+         2,  7,
+        52, 57,
+
+         3,  8,
+        53, 58,
+
+        11, 16,
+        61, 66,
+
+        12, 17,
+        62, 67,
+
+        13, 18,
+        63, 68,
+
+        21, 26,
+        71, 76,
+
+        22, 27,
+        72, 77,
+
+        23, 28,
+        73, 78
+    };
+
+    ASSERT_EQ(answers.size(), output_ptr.size());
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(extract_image_patches_gpu, basic2) {
+    //  Input  : 1x1x10x10
+    //  Output : 1x16x1x1
+
+    const auto& engine = get_test_engine();
+    auto batch = 1;
+    auto depth = 1;
+    auto in_rows = 10;
+    auto in_cols = 10;
+    std::vector<unsigned int> sizes = {4, 4};
+    std::vector<unsigned int> strides = {8, 8};
+    std::vector<unsigned int> rates = {1, 1};
+    std::string auto_pad = "valid";
+    tensor output_shape = {1, 16, 1, 1};
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } });
+
+    std::vector<float> inputVals(batch * depth * in_rows * in_cols);
+    std::generate(inputVals.begin(), inputVals.end(), []() {
+        static float n = 1;
+        return n++;
+    });
+
+    set_values(input, inputVals);
+
+    topology topology;
+    topology.add(input_layout("Input0", input.get_layout()));
+    topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape));
+
+    network network(engine, topology);
+    network.set_input_data("Input0", input);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "extract_image_patches");
+
+    auto output = outputs.at("extract_image_patches").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> answers = {
+         1,
+         2,
+         3,
+         4,
+        11,
+        12,
+        13,
+        14,
+        21,
+        22,
+        23,
+        24,
+        31,
+        32,
+        33,
+        34
+    };
+
+    ASSERT_EQ(answers.size(), output_ptr.size());
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(extract_image_patches_gpu, basic3) {
+    //  Input  : 1x1x10x10
+    //  Output : 1x16x2x2
+
+    const auto& engine = get_test_engine();
+    auto batch = 1;
+    auto depth = 1;
+    auto in_rows = 10;
+    auto in_cols = 10;
+    std::vector<unsigned int> sizes = {4, 4};
+    std::vector<unsigned int> strides = {9, 9};
+    std::vector<unsigned int> rates = {1, 1};
+    std::string auto_pad = "same_upper";
+    tensor output_shape = {1, 16, 2, 2};
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } });
+
+    std::vector<float> inputVals(batch * depth * in_rows * in_cols);
+    std::generate(inputVals.begin(), inputVals.end(), []() {
+        static float n = 1;
+        return n++;
+    });
+
+    set_values(input, inputVals);
+
+    topology topology;
+    topology.add(input_layout("Input0", input.get_layout()));
+    topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape));
+
+    network network(engine, topology);
+    network.set_input_data("Input0", input);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "extract_image_patches");
+
+    auto output = outputs.at("extract_image_patches").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> answers = {
+         0,   0,
+         0,  89,
+
+         0,   0,
+        81,  90,
+
+         0,   0,
+        82,   0,
+
+         0,   0,
+        83,   0,
+
+         0,   9,
+         0,  99,
+
+         1,  10,
+        91, 100,
+
+         2,   0,
+        92,   0,
+
+         3,   0,
+        93,   0,
+
+         0,  19,
+         0,   0,
+
+        11,  20,
+         0,   0,
+
+        12,   0,
+         0,   0,
+
+        13,   0,
+         0,   0,
+
+         0,  29,
+         0,   0,
+
+        21,  30,
+         0,   0,
+
+        22,   0,
+         0,   0,
+
+        23,   0,
+         0,   0,
+    };
+
+    ASSERT_EQ(answers.size(), output_ptr.size());
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(extract_image_patches_gpu, basic3_same_lower) {
+    //  Input  : 1x1x10x10
+    //  Output : 1x16x2x2
+
+    const auto& engine = get_test_engine();
+    auto batch = 1;
+    auto depth = 1;
+    auto in_rows = 10;
+    auto in_cols = 10;
+    std::vector<unsigned int> sizes = {4, 4};
+    std::vector<unsigned int> strides = {9, 9};
+    std::vector<unsigned int> rates = {1, 1};
+    std::string auto_pad = "same_lower";
+    tensor output_shape = {1, 16, 2, 2};
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } });
+
+    std::vector<float> inputVals(batch * depth * in_rows * in_cols);
+    std::generate(inputVals.begin(), inputVals.end(), []() {
+        static float n = 1;
+        return n++;
+    });
+
+    set_values(input, inputVals);
+
+    topology topology;
+    topology.add(input_layout("Input0", input.get_layout()));
+    topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape));
+
+    network network(engine, topology);
+    network.set_input_data("Input0", input);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "extract_image_patches");
+
+    auto output = outputs.at("extract_image_patches").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> answers = {
+         0,   0,
+         0,  78,
+
+         0,   0,
+         0,  79,
+
+         0,   0,
+        71,  80,
+
+         0,   0,
+        72,   0,
+
+         0,   0,
+         0,  88,
+
+         0,   0,
+         0,  89,
+
+         0,   0,
+        81,  90,
+
+         0,   0,
+        82,   0,
+
+         0,   8,
+         0,  98,
+
+         0,   9,
+         0,  99,
+
+         1,  10,
+        91, 100,
+
+         2,   0,
+        92,   0,
+
+         0,  18,
+         0,   0,
+
+         0,  19,
+         0,   0,
+
+        11,  20,
+         0,   0,
+
+        12,   0,
+         0,   0,
+    };
+
+    ASSERT_EQ(answers.size(), output_ptr.size());
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(extract_image_patches_gpu, basic3_enough_space) {
+    //  Input  : 1x1x10x10
+    //  Output : 1x9x2x2
+
+    const auto& engine = get_test_engine();
+    auto batch = 1;
+    auto depth = 1;
+    auto in_rows = 10;
+    auto in_cols = 10;
+    std::vector<unsigned int> sizes = {3, 3};
+    std::vector<unsigned int> strides = {7, 7};
+    std::vector<unsigned int> rates = {1, 1};
+    std::string auto_pad = "same_upper";
+    tensor output_shape = {1, 9, 2, 2};
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } });
+
+    std::vector<float> inputVals(batch * depth * in_rows * in_cols);
+    std::generate(inputVals.begin(), inputVals.end(), []() {
+        static float n = 1;
+        return n++;
+    });
+
+    set_values(input, inputVals);
+
+    topology topology;
+    topology.add(input_layout("Input0", input.get_layout()));
+    topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape));
+
+    network network(engine, topology);
+    network.set_input_data("Input0", input);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "extract_image_patches");
+
+    auto output = outputs.at("extract_image_patches").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> answers = {
+         1,   8,
+        71,  78,
+
+         2,   9,
+        72,  79,
+
+         3,  10,
+        73,  80,
+
+        11,  18,
+        81,  88,
+
+        12,  19,
+        82,  89,
+
+        13,  20,
+        83,  90,
+
+        21,  28,
+        91,  98,
+
+        22,  29,
+        92,  99,
+
+        23,  30,
+        93, 100,
+    };
+
+    ASSERT_EQ(answers.size(), output_ptr.size());
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(extract_image_patches_gpu, basic4) {
+    //  Input  : 1x1x10x10
+    //  Output : 1x9x2x2
+
+    const auto& engine = get_test_engine();
+    auto batch = 1;
+    auto depth = 1;
+    auto in_rows = 10;
+    auto in_cols = 10;
+    std::vector<unsigned int> sizes = {3, 3};
+    std::vector<unsigned int> strides = {5, 5};
+    std::vector<unsigned int> rates = {2, 2};
+    std::string auto_pad = "valid";
+    tensor output_shape = {1, 9, 2, 2};
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } });
+
+    std::vector<float> inputVals(batch * depth * in_rows * in_cols);
+    std::generate(inputVals.begin(), inputVals.end(), []() {
+        static float n = 1;
+        return n++;
+    });
+
+    set_values(input, inputVals);
+
+    topology topology;
+    topology.add(input_layout("Input0", input.get_layout()));
+    topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape));
+
+    network network(engine, topology);
+    network.set_input_data("Input0", input);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "extract_image_patches");
+
+    auto output = outputs.at("extract_image_patches").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> answers = {
+         1,   6,
+        51,  56,
+
+         3,   8,
+        53,  58,
+
+         5,  10,
+        55,  60,
+
+        21,  26,
+        71,  76,
+
+        23,  28,
+        73,  78,
+
+        25,  30,
+        75,  80,
+
+        41,  46,
+        91,  96,
+
+        43,  48,
+        93,  98,
+
+        45,  50,
+        95, 100
+    };
+
+    ASSERT_EQ(answers.size(), output_ptr.size());
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(extract_image_patches_gpu, basic5) {
+    //  Input  : 1x2x5x5
+    //  Output : 1x8x2x2
+
+    const auto& engine = get_test_engine();
+    auto batch = 1;
+    auto depth = 2;
+    auto in_rows = 5;
+    auto in_cols = 5;
+    std::vector<unsigned int> sizes = {2, 2};
+    std::vector<unsigned int> strides = {3, 3};
+    std::vector<unsigned int> rates = {1, 1};
+    std::string auto_pad = "valid";
+    tensor output_shape = {1, 8, 2, 2};
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } });
+
+    std::vector<float> inputVals(batch * depth * in_rows * in_cols);
+    std::generate(inputVals.begin(), inputVals.end(), []() {
+        static float n = 1;
+        return n++;
+    });
+
+    set_values(input, inputVals);
+
+    topology topology;
+    topology.add(input_layout("Input0", input.get_layout()));
+    topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape));
+
+    network network(engine, topology);
+    network.set_input_data("Input0", input);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "extract_image_patches");
+
+    auto output = outputs.at("extract_image_patches").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> answers = {
+         1,  4,
+        16, 19,
+
+        26, 29,
+        41, 44,
+
+         2,  5,
+        17, 20,
+
+        27, 30,
+        42, 45,
+
+         6,  9,
+        21, 24,
+
+        31, 34,
+        46, 49,
+
+         7, 10,
+        22, 25,
+
+        32, 35,
+        47, 50
+    };
+
+    ASSERT_EQ(answers.size(), output_ptr.size());
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+    }
+}