[IE CLDNN] Implement EmbeddingBag operations (#623)

Implemented three operations: EmbeddingBagPackedSum, EmbeddingBagOffsetsSum and EmbeddingSegmentsSum. These operations do the same work but have a different format of inputs.
2020-06-04 10:25:28 +03:00 · 2020-06-04 10:25:28 +03:00 · 546377dc8e
commit 546377dc8e
parent e53b1b7fbc
19 changed files with 2394 additions and 2 deletions
--- a/inference-engine/src/cldnn_engine/cldnn_program.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp
@ -63,6 +63,7 @@
 #include <api/grn.hpp>
 #include <api/ctc_greedy_decoder.hpp>
 #include <api/cum_sum.hpp>
 #include <api/embedding_bag.hpp>
 #include <chrono>
 #include <cmath>
@ -598,6 +599,9 @@ Program::LayerType Program::LayerTypeFromStr(const std::string &str) {
        { "CTCGreedyDecoder", CTCGreedyDecoder },
        { "PriorBoxClustered", PriorBoxClustered },
        { "CumSum", CumSum },
        { "EmbeddingBagPackedSum", EmbeddingBagPackedSum },
        { "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum },
        { "EmbeddingSegmentsSum", EmbeddingSegmentsSum },
    };
    auto it = LayerNameToType.find(str);
    if (it != LayerNameToType.end())
@ -1280,6 +1284,12 @@ void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, InferenceEng
            break;
        case CumSum: CreateCumSumPrimitive(topology, layer);
            break;
        case EmbeddingBagPackedSum: CreateEmbeddingBagPackedSumPrimitive(topology, layer);
            break;
        case EmbeddingBagOffsetsSum: CreateEmbeddingBagOffsetsSumPrimitive(topology, layer);
            break;
        case EmbeddingSegmentsSum: CreateEmbeddingSegmentsSumPrimitive(topology, layer);
            break;
        default: THROW_CLDNN_EXCEPTION("Unknown Layer Type: " << layer->type);
    }
 }
@ -4487,6 +4497,95 @@ void Program::CreatePriorBoxClusteredPrimitive(cldnn::topology& topology, Infere
    AddPrimitiveToProfiler(priorBoxLayerName, layer);
 }
 void Program::CreateEmbeddingBagPackedSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer) {
    ValidateLayer(layer, {2, 3});
    auto inputPrimitives = GetPrevLayersPrimitives(layer);
    auto embeddingBag = as<InferenceEngine::GenericLayer*>(layer);
    auto layerName = layer_type_name_ID(layer);
    auto embeddingBagPrim = cldnn::embedding_bag(
            layerName,
            inputPrimitives,
            cldnn::embedding_bag::packed_sum,
            CldnnTensorFromIEDims(embeddingBag->outData[0]->getTensorDesc().getDims()));
    topology.add(embeddingBagPrim);
    AddPrimitiveToProfiler(layerName, layer);
 }
 void Program::CreateEmbeddingBagOffsetsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer) {
    ValidateLayer(layer, {3, 4, 5});
    auto inputPrimitives = GetPrevLayersPrimitives(layer);
    auto embeddingBag = as<InferenceEngine::GenericLayer*>(layer);
    int32_t defaultIndex = -1;
    if (inputPrimitives.size() > 3) {
        auto defaultIndexInput = layer->insData[3].lock();
        auto defaultIndexInputCreator = defaultIndexInput->getCreatorLayer().lock();
        if (defaultIndexInputCreator->blobs.size() == 1) {
            auto constantBlob = defaultIndexInputCreator->blobs.begin()->second;
            auto defaultIndexPrecision = constantBlob->getTensorDesc().getPrecision();
            if (defaultIndexPrecision == InferenceEngine::Precision::I32) {
                auto data = constantBlob->buffer().as<int32_t*>();
                defaultIndex = data[0];
            } else {
                THROW_IE_EXCEPTION << layer->name << "Incorrect EmbeddingBagOfsetsSum default_index precision";
            }
        }
        inputPrimitives.erase(inputPrimitives.begin() + 3); // Remove "default_index"
    }
    auto layerName = layer_type_name_ID(layer);
    auto embeddingBagPrim = cldnn::embedding_bag(
            layerName,
            inputPrimitives,
            cldnn::embedding_bag::offsets_sum,
            CldnnTensorFromIEDims(embeddingBag->outData[0]->getTensorDesc().getDims()),
            defaultIndex);
    topology.add(embeddingBagPrim);
    AddPrimitiveToProfiler(layerName, layer);
 }
 void Program::CreateEmbeddingSegmentsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer) {
    ValidateLayer(layer, {4, 5, 6});
    auto inputPrimitives = GetPrevLayersPrimitives(layer);
    auto embeddingBag = as<InferenceEngine::GenericLayer*>(layer);
    inputPrimitives.erase(inputPrimitives.begin() + 3); // Remove "num_segments"
    int32_t defaultIndex = -1;
    if (inputPrimitives.size() > 3) {
        auto defaultIndexInput = layer->insData[4].lock();
        auto defaultIndexInputCreator = defaultIndexInput->getCreatorLayer().lock();
        if (defaultIndexInputCreator->blobs.size() == 1) {
            auto constantBlob = defaultIndexInputCreator->blobs.begin()->second;
            auto defaultIndexPrecision = constantBlob->getTensorDesc().getPrecision();
            if (defaultIndexPrecision == InferenceEngine::Precision::I32) {
                auto data = constantBlob->buffer().as<int32_t*>();
                defaultIndex = data[0];
            } else {
                THROW_IE_EXCEPTION << layer->name << "Incorrect EmbeddingBagOfsetsSum default_index precision";
            }
        }
        inputPrimitives.erase(inputPrimitives.begin() + 3); // Remove "default_index"
    }
    auto layerName = layer_type_name_ID(layer);
    auto embeddingBagPrim = cldnn::embedding_bag(
            layerName,
            inputPrimitives,
            cldnn::embedding_bag::segments_sum,
            CldnnTensorFromIEDims(embeddingBag->outData[0]->getTensorDesc().getDims()),
            defaultIndex);
    topology.add(embeddingBagPrim);
    AddPrimitiveToProfiler(layerName, layer);
 }
 bool Program::IsValidSplitConvMerge(const InferenceEngine::SplitLayer *splitLayer) const {
    if (splitLayer->outData.size() != 2) return false;  // split into 2
--- a/inference-engine/src/cldnn_engine/cldnn_program.h
+++ b/inference-engine/src/cldnn_engine/cldnn_program.h
@ -217,6 +217,9 @@ public:
        CTCGreedyDecoder,
        PriorBoxClustered,
        CumSum,
        EmbeddingBagPackedSum,
        EmbeddingBagOffsetsSum,
        EmbeddingSegmentsSum,
        NO_TYPE
    };
    using GenericBlobMap = std::map<cldnn::primitive_id, cldnn::primitive_id>;
@ -373,6 +376,9 @@ private:
    void CreateCTCGreedyDecoderPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
    void CreatePriorBoxClusteredPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
    void CreateCumSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
    void CreateEmbeddingBagPackedSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
    void CreateEmbeddingBagOffsetsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
    void CreateEmbeddingSegmentsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
 };
 }  // namespace CLDNNPlugin
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/embedding_bag_offsets_sum.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/embedding_bag_offsets_sum.cpp
@ -0,0 +1,59 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "single_layer_tests/embedding_bag_offsets_sum.hpp"
 #include <vector>
 #include "common_test_utils/test_constants.hpp"
 using namespace LayerTestsDefinitions;
 namespace {
 const std::vector<InferenceEngine::Precision> netPrecisions = {
    InferenceEngine::Precision::FP32,
    InferenceEngine::Precision::FP16
 };
 const std::vector<InferenceEngine::Precision> indPrecisions = {
    InferenceEngine::Precision::I64,
    InferenceEngine::Precision::I32
 };
 const std::vector<std::vector<size_t>> emb_table_shape = {
    {5, 6},
    {10, 35},
    {5, 4, 16}
 };
 const std::vector<std::vector<size_t>> indices = {
    {0, 1, 2, 2, 3},
    {4, 4, 3, 1, 0},
    {1, 2, 1, 2, 1, 2, 1, 2, 1, 2}
 };
 const std::vector<std::vector<size_t>> offsets = {
    {0, 2},
    {0, 0, 2, 2},
    {2, 4}
 };
 const std::vector<size_t> default_index = {0, 4};
 const std::vector<bool> with_weights = {false, true};
 const std::vector<bool> with_default_index = {false, true};
 const auto embBagOffsetSumArgSet = ::testing::Combine(
    ::testing::ValuesIn(emb_table_shape), ::testing::ValuesIn(indices),
    ::testing::ValuesIn(offsets), ::testing::ValuesIn(default_index),
    ::testing::ValuesIn(with_weights), ::testing::ValuesIn(with_default_index));
 INSTANTIATE_TEST_CASE_P(
    smoke, EmbeddingBagOffsetsSumLayerTest,
    ::testing::Combine(embBagOffsetSumArgSet,
                       ::testing::ValuesIn(netPrecisions),
                       ::testing::ValuesIn(indPrecisions),
                       ::testing::Values(CommonTestUtils::DEVICE_GPU)),
    EmbeddingBagOffsetsSumLayerTest::getTestCaseName);
 }  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/embedding_bag_packed_sum.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/embedding_bag_packed_sum.cpp
@ -0,0 +1,49 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "single_layer_tests/embedding_bag_packed_sum.hpp"
 #include <vector>
 #include "common_test_utils/test_constants.hpp"
 using namespace LayerTestsDefinitions;
 namespace {
 const std::vector<InferenceEngine::Precision> netPrecisions = {
    InferenceEngine::Precision::FP32,
    InferenceEngine::Precision::FP16,
 };
 const std::vector<InferenceEngine::Precision> indPrecisions = {
    InferenceEngine::Precision::I64,
    InferenceEngine::Precision::I32
 };
 const std::vector<std::vector<size_t>> emb_table_shape = {
    {5, 6},
    {10, 35},
    {5, 4, 16}
 };
 const std::vector<std::vector<std::vector<size_t>>> indices = {
    {{0, 1}, {2, 2}, {3, 4}},
    {{4, 4, 3}, {1, 0, 2}},
    {{1, 2, 1, 2}, {1, 2, 1, 2}}
 };
 const std::vector<bool> with_weights = {false, true};
 const auto embBagPackedSumArgSet = ::testing::Combine(
    ::testing::ValuesIn(emb_table_shape), ::testing::ValuesIn(indices),
    ::testing::ValuesIn(with_weights));
 INSTANTIATE_TEST_CASE_P(
    smoke, EmbeddingBagPackedSumLayerTest,
    ::testing::Combine(embBagPackedSumArgSet,
                       ::testing::ValuesIn(netPrecisions),
                       ::testing::ValuesIn(indPrecisions),
                       ::testing::Values(CommonTestUtils::DEVICE_GPU)),
    EmbeddingBagPackedSumLayerTest::getTestCaseName);
 }  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/embedding_segments_sum.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/embedding_segments_sum.cpp
@ -0,0 +1,54 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <vector>
 #include "single_layer_tests/embedding_segments_sum.hpp"
 #include "common_test_utils/test_constants.hpp"
 using namespace LayerTestsDefinitions;
 namespace {
 const std::vector<InferenceEngine::Precision> netPrecisions = {
    InferenceEngine::Precision::FP32,
    InferenceEngine::Precision::FP16
 };
 const std::vector<InferenceEngine::Precision> indPrecisions = {
    InferenceEngine::Precision::I64,
    InferenceEngine::Precision::I32
 };
 const std::vector<std::vector<size_t>> emb_table_shape = {
    {5, 6},
    {10, 35},
    {5, 4, 16}
 };
 const std::vector<std::vector<size_t>> indices = {
    {0, 1, 2, 2, 3},
    {4, 4, 3, 1, 2}
 };
 const std::vector<std::vector<size_t>> segment_ids = {
    {0, 1, 2, 3, 4},
    {0, 0, 2, 2, 4}
 };
 const std::vector<size_t> num_segments = {5, 7};
 const std::vector<size_t> default_index = {0, 4};
 const std::vector<bool> with_weights = {false, true};
 const std::vector<bool> with_default_index = {false, true};
 const auto embSegmentsSumArgSet = ::testing::Combine(
    ::testing::ValuesIn(emb_table_shape), ::testing::ValuesIn(indices),
    ::testing::ValuesIn(segment_ids), ::testing::ValuesIn(num_segments),
    ::testing::ValuesIn(default_index), ::testing::ValuesIn(with_weights),
    ::testing::ValuesIn(with_default_index));
 INSTANTIATE_TEST_CASE_P(
    smoke, EmbeddingSegmentsSumLayerTest,
    ::testing::Combine(embSegmentsSumArgSet, ::testing::ValuesIn(netPrecisions),
                       ::testing::ValuesIn(indPrecisions),
                       ::testing::Values(CommonTestUtils::DEVICE_GPU)),
    EmbeddingSegmentsSumLayerTest::getTestCaseName);
 }  // namespace
--- a/inference-engine/thirdparty/clDNN/api/embedding_bag.hpp
+++ b/inference-engine/thirdparty/clDNN/api/embedding_bag.hpp
@ -0,0 +1,64 @@
 /*
 // Copyright (c) 2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "primitive.hpp"
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
 /// @{
 /// @addtogroup cpp_topology Network Topology
 /// @{
 /// @addtogroup cpp_primitives Primitives
 /// @{
 /// @brief Computes sums of "bags" of embeddings, without instantiating the intermediate embeddings.
 /// @details For each index in `indices` this operator gets values from `data` embedding table and sums all values belonging to each bag.
 struct embedding_bag : public primitive_base<embedding_bag> {
    CLDNN_DECLARE_PRIMITIVE(embedding_bag)
    /// @brief Select type of embedding_bag operation
    enum embedding_bag_type {
        packed_sum,
        offsets_sum,
        segments_sum
    };
    /// @brief Constructs embedding_bag primitive.
    /// @param id This primitive id.
    /// @param inputs Vector with different inputs.
    /// @param output_shape Tensor with shape of output layout
    /// @param default_index default index in embedding table to fill empty "bags"
    embedding_bag(const primitive_id& id,
                  const std::vector<primitive_id>& inputs,
                  const embedding_bag_type& type,
                  const tensor& output_shape,
                  const int32_t default_index = -1,
                  const padding& output_padding = padding())
        : primitive_base(id, inputs, output_padding), type(type), output_shape(output_shape), default_index(default_index) {}
    /// @brief Type of EmbeddingBag operation
    embedding_bag_type type;
    /// @brief Shape of output layout
    tensor output_shape;
    /// @brief Default index
    int32_t default_index;
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
@ -82,7 +82,8 @@ enum class KernelType {
    SPACE_TO_DEPTH,
    GRN,
    CTC_GREEDY_DECODER,
-    CUM_SUM
+    CUM_SUM,
    EMBEDDING_BAG
 };
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -532,4 +533,13 @@ enum class CumSumAxis {
    FEATURE,
    BATCH
 };
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // EmbeddingBagType
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 enum class EmbeddingBagType {
    PACKED_SUM,
    OFFSETS_SUM,
    SEGMENTS_SUM
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embedding_bag/embedding_bag_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embedding_bag/embedding_bag_kernel_ref.cpp
@ -0,0 +1,140 @@
 /*
 // Copyright (c) 2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "embedding_bag_kernel_ref.h"
 #include "kernel_selector_utils.h"
 #include <string>
 #include <vector>
 namespace kernel_selector {
 JitConstants EmbeddingBagKernelRef::GetJitConstants(const embedding_bag_params& params) const {
    JitConstants jit = MakeBaseParamsJitConstants(params);
    switch (params.type) {
    case EmbeddingBagType::PACKED_SUM:
        jit.AddConstant(MakeJitConstant("PACKED_SUM", 1));
        break;
    case EmbeddingBagType::OFFSETS_SUM:
        jit.AddConstant(MakeJitConstant("OFFSETS_SUM", 1));
        break;
    case EmbeddingBagType::SEGMENTS_SUM:
        jit.AddConstant(MakeJitConstant("SEGMENTS_SUM", 1));
        break;
    default:
        break;
    }
    if (params.default_index > -1)
        jit.AddConstant(MakeJitConstant("DEFAULT_INDEX", params.default_index));
    return jit;
 }
 CommonDispatchData EmbeddingBagKernelRef::SetDefault(const embedding_bag_params& params) const {
    CommonDispatchData runInfo;
    std::vector<size_t> global = { params.output.Batch().v,
                                   params.output.Feature().v,
                                   params.output.Y().v * params.output.X().v };
    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
    runInfo.gws0 = global[0];
    runInfo.gws1 = global[1];
    runInfo.gws2 = global[2];
    runInfo.lws0 = local[0];
    runInfo.lws1 = local[1];
    runInfo.lws2 = local[2];
    return runInfo;
 }
 KernelsData EmbeddingBagKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
    KernelData kd = KernelData::Default<embedding_bag_params>(params);
    embedding_bag_params& newParams = *static_cast<embedding_bag_params*>(kd.params.get());
    if (!Validate(params, options)) {
        return {};
    }
    auto runInfo = SetDefault(newParams);
    auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
    auto cldnn_jit = GetJitConstants(newParams);
    std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
    auto& kernel = kd.kernels[0];
    FillCLKernelData(kernel,
            runInfo,
            params.engineInfo,
            kernelName,
            jit,
            entry_point,
            "",
            false,
            false,
            (uint32_t)newParams.inputs.size());
    kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
    return { kd };
 }
 ParamsKey EmbeddingBagKernelRef::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::F16);
    k.EnableInputDataType(Datatype::F32);
    k.EnableInputDataType(Datatype::INT32);
    k.EnableInputDataType(Datatype::INT64);
    k.EnableInputDataType(Datatype::UINT32);
    k.EnableOutputDataType(Datatype::F16);
    k.EnableOutputDataType(Datatype::F32);
    k.EnableInputLayout(DataLayout::bfxy);
    k.EnableAllInputLayout();
    k.EnableAllOutputLayout();
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBatching();
    k.EnableDifferentTypes();
    return k;
 }
 bool EmbeddingBagKernelRef::Validate(const Params& p, const optional_params& o) const {
    if (p.GetType() != KernelType::EMBEDDING_BAG ||
        o.GetType() != KernelType::EMBEDDING_BAG) {
        return false;
    }
    const embedding_bag_params& params = static_cast<const embedding_bag_params&>(p);
    auto checkIntType = [](Datatype dt) {
        if (dt != Datatype::INT32 && dt != Datatype::UINT32)
            return false;
        return true;
    };
    if (!checkIntType(params.inputs[1].GetDType()))
        return false;
    if (params.type == EmbeddingBagType::OFFSETS_SUM || params.type == EmbeddingBagType::SEGMENTS_SUM) {
        if (!checkIntType(params.inputs[2].GetDType()))
            return false;
    }
    return true;
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embedding_bag/embedding_bag_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embedding_bag/embedding_bag_kernel_ref.h
@ -0,0 +1,58 @@
 /*
 // Copyright (c) 2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #pragma once
 #include "common_kernel_base.h"
 #include "kernel_selector_params.h"
 namespace kernel_selector {
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // embedding_bag_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct embedding_bag_params : public base_params {
    embedding_bag_params() : base_params(KernelType::EMBEDDING_BAG), type(EmbeddingBagType::PACKED_SUM), default_index(-1) {}
    EmbeddingBagType type;
    int32_t default_index;
    virtual ParamsKey GetParamsKey() const { return base_params::GetParamsKey(); }
 };
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // embedding_bag_optional_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct embedding_bag_optional_params : optional_params {
    embedding_bag_optional_params() : optional_params(KernelType::EMBEDDING_BAG) {}
 };
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // EmbeddingBagKernelRef
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 class EmbeddingBagKernelRef : public common_kernel_base {
 public:
    EmbeddingBagKernelRef() : common_kernel_base("embedding_bag_ref") {}
    virtual ~EmbeddingBagKernelRef() = default;
 protected:
    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
    ParamsKey GetSupportedKey() const override;
    virtual JitConstants GetJitConstants(const embedding_bag_params& params) const;
    virtual CommonDispatchData SetDefault(const embedding_bag_params& params) const;
    virtual bool Validate(const Params&, const optional_params&) const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embedding_bag/embedding_bag_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embedding_bag/embedding_bag_kernel_selector.cpp
@ -0,0 +1,29 @@
 /*
 // Copyright (c) 2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "embedding_bag_kernel_selector.h"
 #include "embedding_bag_kernel_ref.h"
 namespace kernel_selector {
 embedding_bag_kernel_selector::embedding_bag_kernel_selector() {
    Attach<EmbeddingBagKernelRef>();
 }
 KernelsData embedding_bag_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
    return GetNaiveBestKernel(params, options, KernelType::EMBEDDING_BAG);
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embedding_bag/embedding_bag_kernel_selector.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embedding_bag/embedding_bag_kernel_selector.h
@ -0,0 +1,35 @@
 /*
 // Copyright (c) 2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #pragma once
 #include "kernel_selector.h"
 namespace kernel_selector {
 class embedding_bag_kernel_selector : public kernel_selector_base {
 public:
    static embedding_bag_kernel_selector& Instance() {
        static embedding_bag_kernel_selector instance_;
        return instance_;
    }
    embedding_bag_kernel_selector();
    virtual ~embedding_bag_kernel_selector() {}
    KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/embedding_bag_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/embedding_bag_ref.cl
@ -0,0 +1,148 @@
 // Copyright (c) 2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "include/include_all.cl"
 #ifdef PACKED_SUM
 KERNEL(embedding_bag_ref)(
        const __global INPUT0_TYPE* emb_table,
        const __global INPUT1_TYPE* indices,
 #ifdef INPUT2_TYPE
        const __global INPUT2_TYPE* weights,
 #endif
        __global OUTPUT_TYPE* output)
 {
    const uint batch = get_global_id(0);
    const uint emb_dim1 = get_global_id(1);
    const uint emb_dim2 = (uint)get_global_id(2) / OUTPUT_SIZE_X;
    const uint emb_dim3 = (uint)get_global_id(2) % OUTPUT_SIZE_X;
    OUTPUT_TYPE res = OUTPUT_VAL_ZERO;
    for (int i = 0; i < INPUT1_FEATURE_NUM; ++i)
    {
        uint indices_index = INPUT1_GET_INDEX(batch, i, 0, 0);
        uint emb_index = INPUT0_GET_INDEX(indices[indices_index], emb_dim1, emb_dim2, emb_dim3);
        OUTPUT_TYPE val = emb_table[emb_index];
 #ifdef INPUT2_TYPE
        {
            uint weight_index = INPUT2_GET_INDEX(batch, i, 0, 0);
            val *= weights[weight_index];
        }
 #endif
        res += val;
    }
    uint out_ind = OUTPUT_GET_INDEX(batch, emb_dim1, emb_dim2, emb_dim3);
    output[out_ind] = ACTIVATION(TO_OUTPUT_TYPE(res), ACTIVATION_PARAMS);
 }
 #endif // PACKED_SUM
 #ifdef OFFSETS_SUM
 KERNEL(embedding_bag_ref)(
        const __global INPUT0_TYPE* emb_table,
        const __global INPUT1_TYPE* indices,
        const __global INPUT2_TYPE* offsets,
 #ifdef INPUT3_TYPE
        const __global INPUT3_TYPE* weights,
 #endif
        __global OUTPUT_TYPE* output)
 {
    const uint batch = get_global_id(0);
    const uint emb_dim1 = get_global_id(1);
    const uint emb_dim2 = (uint)get_global_id(2) / OUTPUT_SIZE_X;
    const uint emb_dim3 = (uint)get_global_id(2) % OUTPUT_SIZE_X;
    uint offsets_ind = INPUT2_OFFSET + batch;
    uint start_indices = INPUT1_OFFSET + offsets[offsets_ind];
    offsets_ind = INPUT2_OFFSET + batch + 1;
    uint end_indices = (batch < OUTPUT_BATCH_NUM - 1) ?
                            INPUT1_OFFSET + offsets[offsets_ind] :
                            INPUT1_LENGTH;
    OUTPUT_TYPE res = OUTPUT_VAL_ZERO;
    for (int i = start_indices; i < end_indices; ++i)
    {
        uint indices_index = INPUT1_OFFSET + i;
        uint emb_index = INPUT0_GET_INDEX(indices[indices_index], emb_dim1, emb_dim2, emb_dim3);
        OUTPUT_TYPE val = emb_table[emb_index];
 #ifdef INPUT3_TYPE
        {
            uint weight_index = INPUT3_OFFSET + i;
            val *= weights[weight_index];
        }
 #endif
        res += val;
    }
 #ifdef DEFAULT_INDEX
    if (start_indices == end_indices) {
        {
            uint emb_index = INPUT0_GET_INDEX(DEFAULT_INDEX, emb_dim1, emb_dim2, emb_dim3);
            res = emb_table[emb_index];
        }
    }
 #endif
    uint out_ind = OUTPUT_GET_INDEX(batch, emb_dim1, emb_dim2, emb_dim3);
    output[out_ind] = ACTIVATION(TO_OUTPUT_TYPE(res), ACTIVATION_PARAMS);
 }
 #endif // OFFSETS_SUM
 #ifdef SEGMENTS_SUM
 KERNEL(embedding_bag_ref)(
        const __global INPUT0_TYPE* emb_table,
        const __global INPUT1_TYPE* indices,
        const __global INPUT2_TYPE* segment_ids,
 #ifdef INPUT3_TYPE
        const __global INPUT3_TYPE* weights,
 #endif
        __global OUTPUT_TYPE* output)
 {
    const uint batch = get_global_id(0);
    const uint emb_dim1 = get_global_id(1);
    const uint emb_dim2 = (uint)get_global_id(2) / OUTPUT_SIZE_X;
    const uint emb_dim3 = (uint)get_global_id(2) % OUTPUT_SIZE_X;
    OUTPUT_TYPE res = OUTPUT_VAL_ZERO;
    bool found = false;
    for (int i = 0; i < INPUT2_LENGTH; ++i) {
        uint id = segment_ids[INPUT2_OFFSET + i];
        if (id > batch)
            break;
        if (id == batch) {
            found = true;
            uint index = indices[INPUT1_OFFSET + i];
            uint emb_index = INPUT0_GET_INDEX(index, emb_dim1, emb_dim2, emb_dim3);
            OUTPUT_TYPE val = emb_table[emb_index];
 #ifdef INPUT3_TYPE
            {
                uint weight_index = INPUT3_OFFSET + i;
                val *= weights[weight_index];
            }
 #endif
            res += val;
        }
    }
 #ifdef DEFAULT_INDEX
    if (!found) {
        uint emb_index = INPUT0_GET_INDEX(DEFAULT_INDEX, emb_dim1, emb_dim2, emb_dim3);
        res = emb_table[emb_index];
    }
 #endif
    uint out_ind = OUTPUT_GET_INDEX(batch, emb_dim1, emb_dim2, emb_dim3);
    output[out_ind] = ACTIVATION(TO_OUTPUT_TYPE(res), ACTIVATION_PARAMS);
 }
 #endif // SEGMENTS_SUM
--- a/inference-engine/thirdparty/clDNN/src/embedding_bag.cpp
+++ b/inference-engine/thirdparty/clDNN/src/embedding_bag.cpp
@ -0,0 +1,70 @@
 /*
 // Copyright (c) 2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "embedding_bag_inst.h"
 #include "primitive_type_base.h"
 #include "error_handler.h"
 #include "json_object.h"
 #include <string>
 namespace cldnn {
 primitive_type_id embedding_bag::type_id() {
    static primitive_type_base<embedding_bag> instance;
    return &instance;
 }
 layout embedding_bag_inst::calc_output_layout(embedding_bag_node const& node) {
    auto desc = node.get_primitive();
    auto input_layout = node.input(0).get_output_layout();
    auto output_format = input_layout.format;
    auto output_shape = desc->output_shape;
    return layout(input_layout.data_type, output_format, output_shape);
 }
 std::string embedding_bag_inst::to_string(embedding_bag_node const& node) {
    auto desc = node.get_primitive();
    auto node_info = node.desc_to_json();
    auto& input = node.input();
    std::stringstream primitive_description;
    json_composite embedding_bag_info;
    embedding_bag_info.add("input id", input.id());
    switch(desc->type) {
    case embedding_bag::packed_sum:
        embedding_bag_info.add("embedding bag type", "PackedSum");
        break;
    case embedding_bag::offsets_sum:
        embedding_bag_info.add("embedding bag type", "OffsetsSum");
        break;
    case embedding_bag::segments_sum:
        embedding_bag_info.add("embedding bag type", "SegmentsSum");
        break;
    }
    node_info->add("embedding_bag info", embedding_bag_info);
    node_info->dump(primitive_description);
    return primitive_description.str();
 }
 embedding_bag_inst::typed_primitive_inst(network_impl& network, embedding_bag_node const& node)
    : parent(network, node) {}
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/embedding_bag_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/embedding_bag_gpu.cpp
@ -0,0 +1,85 @@
 /*
 // Copyright (c) 2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "embedding_bag_inst.h"
 #include "primitive_gpu_base.h"
 #include "implementation_map.h"
 #include "kernel_selector_helper.h"
 #include "embedding_bag/embedding_bag_kernel_selector.h"
 #include "embedding_bag/embedding_bag_kernel_ref.h"
 #include "error_handler.h"
 #include "data_inst.h"
 using namespace cldnn;
 namespace cldnn {
 namespace gpu {
 struct embedding_bag_gpu : typed_primitive_gpu_impl<embedding_bag> {
    using parent = typed_primitive_gpu_impl<embedding_bag>;
    using parent::parent;
 public:
    static primitive_impl* create(const embedding_bag_node& arg) {
        auto embedding_bag_params = get_default_params<kernel_selector::embedding_bag_params>(arg);
        auto embedding_bag_optional_params =
            get_default_optional_params<kernel_selector::embedding_bag_optional_params>(arg.get_program());
        switch (arg.get_primitive()->type) {
        case embedding_bag::packed_sum:
            embedding_bag_params.type = kernel_selector::EmbeddingBagType::PACKED_SUM;
            break;
        case embedding_bag::offsets_sum:
            embedding_bag_params.type = kernel_selector::EmbeddingBagType::OFFSETS_SUM;
            break;
        case embedding_bag::segments_sum:
            embedding_bag_params.type = kernel_selector::EmbeddingBagType::SEGMENTS_SUM;
            break;
        default:
            CLDNN_ERROR_MESSAGE(arg.id(), "Unknown EmbeddingBag type");
            break;
        }
        for (size_t i = 1; i < arg.inputs_count(); i++) {
            embedding_bag_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
        }
        embedding_bag_params.default_index = arg.get_primitive()->default_index;
        auto& kernel_selector = kernel_selector::embedding_bag_kernel_selector::Instance();
        auto best_kernels = kernel_selector.GetBestKernels(embedding_bag_params, embedding_bag_optional_params);
        CLDNN_ERROR_BOOL(arg.id(),
                         "Best_kernel.empty()",
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");
        auto embedding_bag = new embedding_bag_gpu(arg, best_kernels[0]);
        return embedding_bag;
    }
 };
 namespace detail {
 attach_embedding_bag_gpu::attach_embedding_bag_gpu() {
    auto val_fw = embedding_bag_gpu::create;
    implementation_map<embedding_bag>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
    implementation_map<embedding_bag>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
 }
 }  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp
@ -98,6 +98,7 @@ void register_implementations_gpu() {
    REGISTER_GPU(grn);
    REGISTER_GPU(ctc_greedy_decoder);
    REGISTER_GPU(cum_sum);
    REGISTER_GPU(embedding_bag);
 }
 }  // namespace gpu
--- a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp
@ -175,6 +175,7 @@ REGISTER_GPU(non_max_suppression);
 REGISTER_GPU(grn);
 REGISTER_GPU(ctc_greedy_decoder);
 REGISTER_GPU(cum_sum);
 REGISTER_GPU(embedding_bag);
 #undef REGISTER_GPU
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
@ -50,6 +50,7 @@
 #include "shuffle_channels_inst.h"
 #include "strided_slice_inst.h"
 #include "cum_sum_inst.h"
 #include "embedding_bag_inst.h"
 #include <vector>
 #include <list>
 #include <memory>
@ -200,7 +201,8 @@ void prepare_primitive_fusing::fuse_activations(program_impl &p) {
                 !input.is_type<softmax>() && !input.is_type<resample>() && !input.is_type<mvn>() &&
                 !input.is_type<depth_to_space>() && !input.is_type<gather>() && !input.is_type<reverse_sequence>() &&
                 !input.is_type<shuffle_channels>() && !input.is_type<strided_slice>() && !input.is_type<cum_sum>() &&
-                 !input.is_type<fused_conv_eltwise>() && !input.is_type<activation>()))
+                 !input.is_type<embedding_bag>() && !input.is_type<fused_conv_eltwise>() &&
                 !input.is_type<activation>()))
                return;
            if (input.is_type<eltwise>()) {
--- a/inference-engine/thirdparty/clDNN/src/include/embedding_bag_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/embedding_bag_inst.h
@ -0,0 +1,49 @@
 /*
 // Copyright (c) 2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "api/embedding_bag.hpp"
 #include "primitive_inst.h"
 #include <string>
 namespace cldnn {
 template <>
 struct typed_program_node<embedding_bag> : public typed_program_node_base<embedding_bag> {
    using parent = typed_program_node_base<embedding_bag>;
 public:
    using parent::parent;
    program_node& input(size_t index = 0) const { return get_dependency(index); }
    size_t inputs_count() const { return get_dependencies().size(); }
 };
 using embedding_bag_node = typed_program_node<embedding_bag>;
 template <>
 class typed_primitive_inst<embedding_bag> : public typed_primitive_inst_base<embedding_bag> {
    using parent = typed_primitive_inst_base<embedding_bag>;
 public:
    static layout calc_output_layout(embedding_bag_node const& node);
    static std::string to_string(embedding_bag_node const& node);
    typed_primitive_inst(network_impl& network, embedding_bag_node const& desc);
 };
 using embedding_bag_inst = typed_primitive_inst<embedding_bag>;
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/embedding_bag_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/embedding_bag_gpu_test.cpp