[IE CLDNN] Implement EmbeddingBag operations (#623)

Implemented three operations: EmbeddingBagPackedSum,
EmbeddingBagOffsetsSum and EmbeddingSegmentsSum. These operations do
the same work but have a different format of inputs.
This commit is contained in:
Egor Churaev 2020-06-04 10:25:28 +03:00 committed by GitHub
parent e53b1b7fbc
commit 546377dc8e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 2394 additions and 2 deletions

View File

@ -63,6 +63,7 @@
#include <api/grn.hpp> #include <api/grn.hpp>
#include <api/ctc_greedy_decoder.hpp> #include <api/ctc_greedy_decoder.hpp>
#include <api/cum_sum.hpp> #include <api/cum_sum.hpp>
#include <api/embedding_bag.hpp>
#include <chrono> #include <chrono>
#include <cmath> #include <cmath>
@ -598,6 +599,9 @@ Program::LayerType Program::LayerTypeFromStr(const std::string &str) {
{ "CTCGreedyDecoder", CTCGreedyDecoder }, { "CTCGreedyDecoder", CTCGreedyDecoder },
{ "PriorBoxClustered", PriorBoxClustered }, { "PriorBoxClustered", PriorBoxClustered },
{ "CumSum", CumSum }, { "CumSum", CumSum },
{ "EmbeddingBagPackedSum", EmbeddingBagPackedSum },
{ "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum },
{ "EmbeddingSegmentsSum", EmbeddingSegmentsSum },
}; };
auto it = LayerNameToType.find(str); auto it = LayerNameToType.find(str);
if (it != LayerNameToType.end()) if (it != LayerNameToType.end())
@ -1280,6 +1284,12 @@ void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, InferenceEng
break; break;
case CumSum: CreateCumSumPrimitive(topology, layer); case CumSum: CreateCumSumPrimitive(topology, layer);
break; break;
case EmbeddingBagPackedSum: CreateEmbeddingBagPackedSumPrimitive(topology, layer);
break;
case EmbeddingBagOffsetsSum: CreateEmbeddingBagOffsetsSumPrimitive(topology, layer);
break;
case EmbeddingSegmentsSum: CreateEmbeddingSegmentsSumPrimitive(topology, layer);
break;
default: THROW_CLDNN_EXCEPTION("Unknown Layer Type: " << layer->type); default: THROW_CLDNN_EXCEPTION("Unknown Layer Type: " << layer->type);
} }
} }
@ -4487,6 +4497,95 @@ void Program::CreatePriorBoxClusteredPrimitive(cldnn::topology& topology, Infere
AddPrimitiveToProfiler(priorBoxLayerName, layer); AddPrimitiveToProfiler(priorBoxLayerName, layer);
} }
void Program::CreateEmbeddingBagPackedSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer) {
ValidateLayer(layer, {2, 3});
auto inputPrimitives = GetPrevLayersPrimitives(layer);
auto embeddingBag = as<InferenceEngine::GenericLayer*>(layer);
auto layerName = layer_type_name_ID(layer);
auto embeddingBagPrim = cldnn::embedding_bag(
layerName,
inputPrimitives,
cldnn::embedding_bag::packed_sum,
CldnnTensorFromIEDims(embeddingBag->outData[0]->getTensorDesc().getDims()));
topology.add(embeddingBagPrim);
AddPrimitiveToProfiler(layerName, layer);
}
void Program::CreateEmbeddingBagOffsetsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer) {
ValidateLayer(layer, {3, 4, 5});
auto inputPrimitives = GetPrevLayersPrimitives(layer);
auto embeddingBag = as<InferenceEngine::GenericLayer*>(layer);
int32_t defaultIndex = -1;
if (inputPrimitives.size() > 3) {
auto defaultIndexInput = layer->insData[3].lock();
auto defaultIndexInputCreator = defaultIndexInput->getCreatorLayer().lock();
if (defaultIndexInputCreator->blobs.size() == 1) {
auto constantBlob = defaultIndexInputCreator->blobs.begin()->second;
auto defaultIndexPrecision = constantBlob->getTensorDesc().getPrecision();
if (defaultIndexPrecision == InferenceEngine::Precision::I32) {
auto data = constantBlob->buffer().as<int32_t*>();
defaultIndex = data[0];
} else {
THROW_IE_EXCEPTION << layer->name << "Incorrect EmbeddingBagOfsetsSum default_index precision";
}
}
inputPrimitives.erase(inputPrimitives.begin() + 3); // Remove "default_index"
}
auto layerName = layer_type_name_ID(layer);
auto embeddingBagPrim = cldnn::embedding_bag(
layerName,
inputPrimitives,
cldnn::embedding_bag::offsets_sum,
CldnnTensorFromIEDims(embeddingBag->outData[0]->getTensorDesc().getDims()),
defaultIndex);
topology.add(embeddingBagPrim);
AddPrimitiveToProfiler(layerName, layer);
}
void Program::CreateEmbeddingSegmentsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer) {
ValidateLayer(layer, {4, 5, 6});
auto inputPrimitives = GetPrevLayersPrimitives(layer);
auto embeddingBag = as<InferenceEngine::GenericLayer*>(layer);
inputPrimitives.erase(inputPrimitives.begin() + 3); // Remove "num_segments"
int32_t defaultIndex = -1;
if (inputPrimitives.size() > 3) {
auto defaultIndexInput = layer->insData[4].lock();
auto defaultIndexInputCreator = defaultIndexInput->getCreatorLayer().lock();
if (defaultIndexInputCreator->blobs.size() == 1) {
auto constantBlob = defaultIndexInputCreator->blobs.begin()->second;
auto defaultIndexPrecision = constantBlob->getTensorDesc().getPrecision();
if (defaultIndexPrecision == InferenceEngine::Precision::I32) {
auto data = constantBlob->buffer().as<int32_t*>();
defaultIndex = data[0];
} else {
THROW_IE_EXCEPTION << layer->name << "Incorrect EmbeddingBagOfsetsSum default_index precision";
}
}
inputPrimitives.erase(inputPrimitives.begin() + 3); // Remove "default_index"
}
auto layerName = layer_type_name_ID(layer);
auto embeddingBagPrim = cldnn::embedding_bag(
layerName,
inputPrimitives,
cldnn::embedding_bag::segments_sum,
CldnnTensorFromIEDims(embeddingBag->outData[0]->getTensorDesc().getDims()),
defaultIndex);
topology.add(embeddingBagPrim);
AddPrimitiveToProfiler(layerName, layer);
}
bool Program::IsValidSplitConvMerge(const InferenceEngine::SplitLayer *splitLayer) const { bool Program::IsValidSplitConvMerge(const InferenceEngine::SplitLayer *splitLayer) const {
if (splitLayer->outData.size() != 2) return false; // split into 2 if (splitLayer->outData.size() != 2) return false; // split into 2

View File

@ -217,6 +217,9 @@ public:
CTCGreedyDecoder, CTCGreedyDecoder,
PriorBoxClustered, PriorBoxClustered,
CumSum, CumSum,
EmbeddingBagPackedSum,
EmbeddingBagOffsetsSum,
EmbeddingSegmentsSum,
NO_TYPE NO_TYPE
}; };
using GenericBlobMap = std::map<cldnn::primitive_id, cldnn::primitive_id>; using GenericBlobMap = std::map<cldnn::primitive_id, cldnn::primitive_id>;
@ -373,6 +376,9 @@ private:
void CreateCTCGreedyDecoderPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer); void CreateCTCGreedyDecoderPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
void CreatePriorBoxClusteredPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer); void CreatePriorBoxClusteredPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
void CreateCumSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer); void CreateCumSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
void CreateEmbeddingBagPackedSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
void CreateEmbeddingBagOffsetsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
void CreateEmbeddingSegmentsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
}; };
} // namespace CLDNNPlugin } // namespace CLDNNPlugin

View File

@ -0,0 +1,59 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "single_layer_tests/embedding_bag_offsets_sum.hpp"
#include <vector>
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16
};
const std::vector<InferenceEngine::Precision> indPrecisions = {
InferenceEngine::Precision::I64,
InferenceEngine::Precision::I32
};
const std::vector<std::vector<size_t>> emb_table_shape = {
{5, 6},
{10, 35},
{5, 4, 16}
};
const std::vector<std::vector<size_t>> indices = {
{0, 1, 2, 2, 3},
{4, 4, 3, 1, 0},
{1, 2, 1, 2, 1, 2, 1, 2, 1, 2}
};
const std::vector<std::vector<size_t>> offsets = {
{0, 2},
{0, 0, 2, 2},
{2, 4}
};
const std::vector<size_t> default_index = {0, 4};
const std::vector<bool> with_weights = {false, true};
const std::vector<bool> with_default_index = {false, true};
const auto embBagOffsetSumArgSet = ::testing::Combine(
::testing::ValuesIn(emb_table_shape), ::testing::ValuesIn(indices),
::testing::ValuesIn(offsets), ::testing::ValuesIn(default_index),
::testing::ValuesIn(with_weights), ::testing::ValuesIn(with_default_index));
INSTANTIATE_TEST_CASE_P(
smoke, EmbeddingBagOffsetsSumLayerTest,
::testing::Combine(embBagOffsetSumArgSet,
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(indPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
EmbeddingBagOffsetsSumLayerTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,49 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "single_layer_tests/embedding_bag_packed_sum.hpp"
#include <vector>
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16,
};
const std::vector<InferenceEngine::Precision> indPrecisions = {
InferenceEngine::Precision::I64,
InferenceEngine::Precision::I32
};
const std::vector<std::vector<size_t>> emb_table_shape = {
{5, 6},
{10, 35},
{5, 4, 16}
};
const std::vector<std::vector<std::vector<size_t>>> indices = {
{{0, 1}, {2, 2}, {3, 4}},
{{4, 4, 3}, {1, 0, 2}},
{{1, 2, 1, 2}, {1, 2, 1, 2}}
};
const std::vector<bool> with_weights = {false, true};
const auto embBagPackedSumArgSet = ::testing::Combine(
::testing::ValuesIn(emb_table_shape), ::testing::ValuesIn(indices),
::testing::ValuesIn(with_weights));
INSTANTIATE_TEST_CASE_P(
smoke, EmbeddingBagPackedSumLayerTest,
::testing::Combine(embBagPackedSumArgSet,
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(indPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
EmbeddingBagPackedSumLayerTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,54 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/embedding_segments_sum.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16
};
const std::vector<InferenceEngine::Precision> indPrecisions = {
InferenceEngine::Precision::I64,
InferenceEngine::Precision::I32
};
const std::vector<std::vector<size_t>> emb_table_shape = {
{5, 6},
{10, 35},
{5, 4, 16}
};
const std::vector<std::vector<size_t>> indices = {
{0, 1, 2, 2, 3},
{4, 4, 3, 1, 2}
};
const std::vector<std::vector<size_t>> segment_ids = {
{0, 1, 2, 3, 4},
{0, 0, 2, 2, 4}
};
const std::vector<size_t> num_segments = {5, 7};
const std::vector<size_t> default_index = {0, 4};
const std::vector<bool> with_weights = {false, true};
const std::vector<bool> with_default_index = {false, true};
const auto embSegmentsSumArgSet = ::testing::Combine(
::testing::ValuesIn(emb_table_shape), ::testing::ValuesIn(indices),
::testing::ValuesIn(segment_ids), ::testing::ValuesIn(num_segments),
::testing::ValuesIn(default_index), ::testing::ValuesIn(with_weights),
::testing::ValuesIn(with_default_index));
INSTANTIATE_TEST_CASE_P(
smoke, EmbeddingSegmentsSumLayerTest,
::testing::Combine(embSegmentsSumArgSet, ::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(indPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
EmbeddingSegmentsSumLayerTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,64 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Computes sums of "bags" of embeddings, without instantiating the intermediate embeddings.
/// @details For each index in `indices` this operator gets values from `data` embedding table and sums all values belonging to each bag.
struct embedding_bag : public primitive_base<embedding_bag> {
CLDNN_DECLARE_PRIMITIVE(embedding_bag)
/// @brief Select type of embedding_bag operation
enum embedding_bag_type {
packed_sum,
offsets_sum,
segments_sum
};
/// @brief Constructs embedding_bag primitive.
/// @param id This primitive id.
/// @param inputs Vector with different inputs.
/// @param output_shape Tensor with shape of output layout
/// @param default_index default index in embedding table to fill empty "bags"
embedding_bag(const primitive_id& id,
const std::vector<primitive_id>& inputs,
const embedding_bag_type& type,
const tensor& output_shape,
const int32_t default_index = -1,
const padding& output_padding = padding())
: primitive_base(id, inputs, output_padding), type(type), output_shape(output_shape), default_index(default_index) {}
/// @brief Type of EmbeddingBag operation
embedding_bag_type type;
/// @brief Shape of output layout
tensor output_shape;
/// @brief Default index
int32_t default_index;
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -82,7 +82,8 @@ enum class KernelType {
SPACE_TO_DEPTH, SPACE_TO_DEPTH,
GRN, GRN,
CTC_GREEDY_DECODER, CTC_GREEDY_DECODER,
CUM_SUM CUM_SUM,
EMBEDDING_BAG
}; };
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -532,4 +533,13 @@ enum class CumSumAxis {
FEATURE, FEATURE,
BATCH BATCH
}; };
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// EmbeddingBagType
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
enum class EmbeddingBagType {
PACKED_SUM,
OFFSETS_SUM,
SEGMENTS_SUM
};
} // namespace kernel_selector } // namespace kernel_selector

View File

@ -0,0 +1,140 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "embedding_bag_kernel_ref.h"
#include "kernel_selector_utils.h"
#include <string>
#include <vector>
namespace kernel_selector {
JitConstants EmbeddingBagKernelRef::GetJitConstants(const embedding_bag_params& params) const {
JitConstants jit = MakeBaseParamsJitConstants(params);
switch (params.type) {
case EmbeddingBagType::PACKED_SUM:
jit.AddConstant(MakeJitConstant("PACKED_SUM", 1));
break;
case EmbeddingBagType::OFFSETS_SUM:
jit.AddConstant(MakeJitConstant("OFFSETS_SUM", 1));
break;
case EmbeddingBagType::SEGMENTS_SUM:
jit.AddConstant(MakeJitConstant("SEGMENTS_SUM", 1));
break;
default:
break;
}
if (params.default_index > -1)
jit.AddConstant(MakeJitConstant("DEFAULT_INDEX", params.default_index));
return jit;
}
CommonDispatchData EmbeddingBagKernelRef::SetDefault(const embedding_bag_params& params) const {
CommonDispatchData runInfo;
std::vector<size_t> global = { params.output.Batch().v,
params.output.Feature().v,
params.output.Y().v * params.output.X().v };
auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
runInfo.gws0 = global[0];
runInfo.gws1 = global[1];
runInfo.gws2 = global[2];
runInfo.lws0 = local[0];
runInfo.lws1 = local[1];
runInfo.lws2 = local[2];
return runInfo;
}
KernelsData EmbeddingBagKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
KernelData kd = KernelData::Default<embedding_bag_params>(params);
embedding_bag_params& newParams = *static_cast<embedding_bag_params*>(kd.params.get());
if (!Validate(params, options)) {
return {};
}
auto runInfo = SetDefault(newParams);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto cldnn_jit = GetJitConstants(newParams);
std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
FillCLKernelData(kernel,
runInfo,
params.engineInfo,
kernelName,
jit,
entry_point,
"",
false,
false,
(uint32_t)newParams.inputs.size());
kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
return { kd };
}
ParamsKey EmbeddingBagKernelRef::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableInputDataType(Datatype::INT32);
k.EnableInputDataType(Datatype::INT64);
k.EnableInputDataType(Datatype::UINT32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableInputLayout(DataLayout::bfxy);
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBatching();
k.EnableDifferentTypes();
return k;
}
bool EmbeddingBagKernelRef::Validate(const Params& p, const optional_params& o) const {
if (p.GetType() != KernelType::EMBEDDING_BAG ||
o.GetType() != KernelType::EMBEDDING_BAG) {
return false;
}
const embedding_bag_params& params = static_cast<const embedding_bag_params&>(p);
auto checkIntType = [](Datatype dt) {
if (dt != Datatype::INT32 && dt != Datatype::UINT32)
return false;
return true;
};
if (!checkIntType(params.inputs[1].GetDType()))
return false;
if (params.type == EmbeddingBagType::OFFSETS_SUM || params.type == EmbeddingBagType::SEGMENTS_SUM) {
if (!checkIntType(params.inputs[2].GetDType()))
return false;
}
return true;
}
} // namespace kernel_selector

View File

@ -0,0 +1,58 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "common_kernel_base.h"
#include "kernel_selector_params.h"
namespace kernel_selector {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// embedding_bag_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct embedding_bag_params : public base_params {
embedding_bag_params() : base_params(KernelType::EMBEDDING_BAG), type(EmbeddingBagType::PACKED_SUM), default_index(-1) {}
EmbeddingBagType type;
int32_t default_index;
virtual ParamsKey GetParamsKey() const { return base_params::GetParamsKey(); }
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// embedding_bag_optional_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct embedding_bag_optional_params : optional_params {
embedding_bag_optional_params() : optional_params(KernelType::EMBEDDING_BAG) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// EmbeddingBagKernelRef
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class EmbeddingBagKernelRef : public common_kernel_base {
public:
EmbeddingBagKernelRef() : common_kernel_base("embedding_bag_ref") {}
virtual ~EmbeddingBagKernelRef() = default;
protected:
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
virtual JitConstants GetJitConstants(const embedding_bag_params& params) const;
virtual CommonDispatchData SetDefault(const embedding_bag_params& params) const;
virtual bool Validate(const Params&, const optional_params&) const override;
};
} // namespace kernel_selector

View File

@ -0,0 +1,29 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "embedding_bag_kernel_selector.h"
#include "embedding_bag_kernel_ref.h"
namespace kernel_selector {
embedding_bag_kernel_selector::embedding_bag_kernel_selector() {
Attach<EmbeddingBagKernelRef>();
}
KernelsData embedding_bag_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
return GetNaiveBestKernel(params, options, KernelType::EMBEDDING_BAG);
}
} // namespace kernel_selector

View File

@ -0,0 +1,35 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "kernel_selector.h"
namespace kernel_selector {
class embedding_bag_kernel_selector : public kernel_selector_base {
public:
static embedding_bag_kernel_selector& Instance() {
static embedding_bag_kernel_selector instance_;
return instance_;
}
embedding_bag_kernel_selector();
virtual ~embedding_bag_kernel_selector() {}
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
};
} // namespace kernel_selector

View File

@ -0,0 +1,148 @@
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "include/include_all.cl"
#ifdef PACKED_SUM
KERNEL(embedding_bag_ref)(
const __global INPUT0_TYPE* emb_table,
const __global INPUT1_TYPE* indices,
#ifdef INPUT2_TYPE
const __global INPUT2_TYPE* weights,
#endif
__global OUTPUT_TYPE* output)
{
const uint batch = get_global_id(0);
const uint emb_dim1 = get_global_id(1);
const uint emb_dim2 = (uint)get_global_id(2) / OUTPUT_SIZE_X;
const uint emb_dim3 = (uint)get_global_id(2) % OUTPUT_SIZE_X;
OUTPUT_TYPE res = OUTPUT_VAL_ZERO;
for (int i = 0; i < INPUT1_FEATURE_NUM; ++i)
{
uint indices_index = INPUT1_GET_INDEX(batch, i, 0, 0);
uint emb_index = INPUT0_GET_INDEX(indices[indices_index], emb_dim1, emb_dim2, emb_dim3);
OUTPUT_TYPE val = emb_table[emb_index];
#ifdef INPUT2_TYPE
{
uint weight_index = INPUT2_GET_INDEX(batch, i, 0, 0);
val *= weights[weight_index];
}
#endif
res += val;
}
uint out_ind = OUTPUT_GET_INDEX(batch, emb_dim1, emb_dim2, emb_dim3);
output[out_ind] = ACTIVATION(TO_OUTPUT_TYPE(res), ACTIVATION_PARAMS);
}
#endif // PACKED_SUM
#ifdef OFFSETS_SUM
KERNEL(embedding_bag_ref)(
const __global INPUT0_TYPE* emb_table,
const __global INPUT1_TYPE* indices,
const __global INPUT2_TYPE* offsets,
#ifdef INPUT3_TYPE
const __global INPUT3_TYPE* weights,
#endif
__global OUTPUT_TYPE* output)
{
const uint batch = get_global_id(0);
const uint emb_dim1 = get_global_id(1);
const uint emb_dim2 = (uint)get_global_id(2) / OUTPUT_SIZE_X;
const uint emb_dim3 = (uint)get_global_id(2) % OUTPUT_SIZE_X;
uint offsets_ind = INPUT2_OFFSET + batch;
uint start_indices = INPUT1_OFFSET + offsets[offsets_ind];
offsets_ind = INPUT2_OFFSET + batch + 1;
uint end_indices = (batch < OUTPUT_BATCH_NUM - 1) ?
INPUT1_OFFSET + offsets[offsets_ind] :
INPUT1_LENGTH;
OUTPUT_TYPE res = OUTPUT_VAL_ZERO;
for (int i = start_indices; i < end_indices; ++i)
{
uint indices_index = INPUT1_OFFSET + i;
uint emb_index = INPUT0_GET_INDEX(indices[indices_index], emb_dim1, emb_dim2, emb_dim3);
OUTPUT_TYPE val = emb_table[emb_index];
#ifdef INPUT3_TYPE
{
uint weight_index = INPUT3_OFFSET + i;
val *= weights[weight_index];
}
#endif
res += val;
}
#ifdef DEFAULT_INDEX
if (start_indices == end_indices) {
{
uint emb_index = INPUT0_GET_INDEX(DEFAULT_INDEX, emb_dim1, emb_dim2, emb_dim3);
res = emb_table[emb_index];
}
}
#endif
uint out_ind = OUTPUT_GET_INDEX(batch, emb_dim1, emb_dim2, emb_dim3);
output[out_ind] = ACTIVATION(TO_OUTPUT_TYPE(res), ACTIVATION_PARAMS);
}
#endif // OFFSETS_SUM
#ifdef SEGMENTS_SUM
KERNEL(embedding_bag_ref)(
const __global INPUT0_TYPE* emb_table,
const __global INPUT1_TYPE* indices,
const __global INPUT2_TYPE* segment_ids,
#ifdef INPUT3_TYPE
const __global INPUT3_TYPE* weights,
#endif
__global OUTPUT_TYPE* output)
{
const uint batch = get_global_id(0);
const uint emb_dim1 = get_global_id(1);
const uint emb_dim2 = (uint)get_global_id(2) / OUTPUT_SIZE_X;
const uint emb_dim3 = (uint)get_global_id(2) % OUTPUT_SIZE_X;
OUTPUT_TYPE res = OUTPUT_VAL_ZERO;
bool found = false;
for (int i = 0; i < INPUT2_LENGTH; ++i) {
uint id = segment_ids[INPUT2_OFFSET + i];
if (id > batch)
break;
if (id == batch) {
found = true;
uint index = indices[INPUT1_OFFSET + i];
uint emb_index = INPUT0_GET_INDEX(index, emb_dim1, emb_dim2, emb_dim3);
OUTPUT_TYPE val = emb_table[emb_index];
#ifdef INPUT3_TYPE
{
uint weight_index = INPUT3_OFFSET + i;
val *= weights[weight_index];
}
#endif
res += val;
}
}
#ifdef DEFAULT_INDEX
if (!found) {
uint emb_index = INPUT0_GET_INDEX(DEFAULT_INDEX, emb_dim1, emb_dim2, emb_dim3);
res = emb_table[emb_index];
}
#endif
uint out_ind = OUTPUT_GET_INDEX(batch, emb_dim1, emb_dim2, emb_dim3);
output[out_ind] = ACTIVATION(TO_OUTPUT_TYPE(res), ACTIVATION_PARAMS);
}
#endif // SEGMENTS_SUM

View File

@ -0,0 +1,70 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "embedding_bag_inst.h"
#include "primitive_type_base.h"
#include "error_handler.h"
#include "json_object.h"
#include <string>
namespace cldnn {
primitive_type_id embedding_bag::type_id() {
static primitive_type_base<embedding_bag> instance;
return &instance;
}
layout embedding_bag_inst::calc_output_layout(embedding_bag_node const& node) {
auto desc = node.get_primitive();
auto input_layout = node.input(0).get_output_layout();
auto output_format = input_layout.format;
auto output_shape = desc->output_shape;
return layout(input_layout.data_type, output_format, output_shape);
}
std::string embedding_bag_inst::to_string(embedding_bag_node const& node) {
auto desc = node.get_primitive();
auto node_info = node.desc_to_json();
auto& input = node.input();
std::stringstream primitive_description;
json_composite embedding_bag_info;
embedding_bag_info.add("input id", input.id());
switch(desc->type) {
case embedding_bag::packed_sum:
embedding_bag_info.add("embedding bag type", "PackedSum");
break;
case embedding_bag::offsets_sum:
embedding_bag_info.add("embedding bag type", "OffsetsSum");
break;
case embedding_bag::segments_sum:
embedding_bag_info.add("embedding bag type", "SegmentsSum");
break;
}
node_info->add("embedding_bag info", embedding_bag_info);
node_info->dump(primitive_description);
return primitive_description.str();
}
embedding_bag_inst::typed_primitive_inst(network_impl& network, embedding_bag_node const& node)
: parent(network, node) {}
} // namespace cldnn

View File

@ -0,0 +1,85 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "embedding_bag_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "kernel_selector_helper.h"
#include "embedding_bag/embedding_bag_kernel_selector.h"
#include "embedding_bag/embedding_bag_kernel_ref.h"
#include "error_handler.h"
#include "data_inst.h"
using namespace cldnn;
namespace cldnn {
namespace gpu {
struct embedding_bag_gpu : typed_primitive_gpu_impl<embedding_bag> {
using parent = typed_primitive_gpu_impl<embedding_bag>;
using parent::parent;
public:
static primitive_impl* create(const embedding_bag_node& arg) {
auto embedding_bag_params = get_default_params<kernel_selector::embedding_bag_params>(arg);
auto embedding_bag_optional_params =
get_default_optional_params<kernel_selector::embedding_bag_optional_params>(arg.get_program());
switch (arg.get_primitive()->type) {
case embedding_bag::packed_sum:
embedding_bag_params.type = kernel_selector::EmbeddingBagType::PACKED_SUM;
break;
case embedding_bag::offsets_sum:
embedding_bag_params.type = kernel_selector::EmbeddingBagType::OFFSETS_SUM;
break;
case embedding_bag::segments_sum:
embedding_bag_params.type = kernel_selector::EmbeddingBagType::SEGMENTS_SUM;
break;
default:
CLDNN_ERROR_MESSAGE(arg.id(), "Unknown EmbeddingBag type");
break;
}
for (size_t i = 1; i < arg.inputs_count(); i++) {
embedding_bag_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
}
embedding_bag_params.default_index = arg.get_primitive()->default_index;
auto& kernel_selector = kernel_selector::embedding_bag_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(embedding_bag_params, embedding_bag_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto embedding_bag = new embedding_bag_gpu(arg, best_kernels[0]);
return embedding_bag;
}
};
namespace detail {
attach_embedding_bag_gpu::attach_embedding_bag_gpu() {
auto val_fw = embedding_bag_gpu::create;
implementation_map<embedding_bag>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<embedding_bag>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@ -98,6 +98,7 @@ void register_implementations_gpu() {
REGISTER_GPU(grn); REGISTER_GPU(grn);
REGISTER_GPU(ctc_greedy_decoder); REGISTER_GPU(ctc_greedy_decoder);
REGISTER_GPU(cum_sum); REGISTER_GPU(cum_sum);
REGISTER_GPU(embedding_bag);
} }
} // namespace gpu } // namespace gpu

View File

@ -175,6 +175,7 @@ REGISTER_GPU(non_max_suppression);
REGISTER_GPU(grn); REGISTER_GPU(grn);
REGISTER_GPU(ctc_greedy_decoder); REGISTER_GPU(ctc_greedy_decoder);
REGISTER_GPU(cum_sum); REGISTER_GPU(cum_sum);
REGISTER_GPU(embedding_bag);
#undef REGISTER_GPU #undef REGISTER_GPU

View File

@ -50,6 +50,7 @@
#include "shuffle_channels_inst.h" #include "shuffle_channels_inst.h"
#include "strided_slice_inst.h" #include "strided_slice_inst.h"
#include "cum_sum_inst.h" #include "cum_sum_inst.h"
#include "embedding_bag_inst.h"
#include <vector> #include <vector>
#include <list> #include <list>
#include <memory> #include <memory>
@ -200,7 +201,8 @@ void prepare_primitive_fusing::fuse_activations(program_impl &p) {
!input.is_type<softmax>() && !input.is_type<resample>() && !input.is_type<mvn>() && !input.is_type<softmax>() && !input.is_type<resample>() && !input.is_type<mvn>() &&
!input.is_type<depth_to_space>() && !input.is_type<gather>() && !input.is_type<reverse_sequence>() && !input.is_type<depth_to_space>() && !input.is_type<gather>() && !input.is_type<reverse_sequence>() &&
!input.is_type<shuffle_channels>() && !input.is_type<strided_slice>() && !input.is_type<cum_sum>() && !input.is_type<shuffle_channels>() && !input.is_type<strided_slice>() && !input.is_type<cum_sum>() &&
!input.is_type<fused_conv_eltwise>() && !input.is_type<activation>())) !input.is_type<embedding_bag>() && !input.is_type<fused_conv_eltwise>() &&
!input.is_type<activation>()))
return; return;
if (input.is_type<eltwise>()) { if (input.is_type<eltwise>()) {

View File

@ -0,0 +1,49 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "api/embedding_bag.hpp"
#include "primitive_inst.h"
#include <string>
namespace cldnn {
template <>
struct typed_program_node<embedding_bag> : public typed_program_node_base<embedding_bag> {
using parent = typed_program_node_base<embedding_bag>;
public:
using parent::parent;
program_node& input(size_t index = 0) const { return get_dependency(index); }
size_t inputs_count() const { return get_dependencies().size(); }
};
using embedding_bag_node = typed_program_node<embedding_bag>;
template <>
class typed_primitive_inst<embedding_bag> : public typed_primitive_inst_base<embedding_bag> {
using parent = typed_primitive_inst_base<embedding_bag>;
public:
static layout calc_output_layout(embedding_bag_node const& node);
static std::string to_string(embedding_bag_node const& node);
typed_primitive_inst(network_impl& network, embedding_bag_node const& desc);
};
using embedding_bag_inst = typed_primitive_inst<embedding_bag>;
} // namespace cldnn

File diff suppressed because it is too large Load Diff