[IE CLDNN] Added space_to_batch operation (#984)

This commit is contained in:
Roman Lyamin
2020-06-24 18:30:24 +03:00
committed by GitHub
parent cec12131e7
commit bc132056f9
20 changed files with 1275 additions and 5 deletions

View File

@@ -82,7 +82,8 @@ InferenceEngine::ICNNNetwork::Ptr clDNNEngine::CloneNetwork(const InferenceEngin
return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
std::dynamic_pointer_cast<const ::ngraph::opset3::ShuffleChannels>(node) ||
std::dynamic_pointer_cast<const ::ngraph::opset2::BatchToSpace>(node);
std::dynamic_pointer_cast<const ::ngraph::opset2::BatchToSpace>(node) ||
std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node);
};
auto nGraphFunc = clonedNetwork->getFunction();
// Disable shape inference (WA for generic operations)

View File

@@ -49,6 +49,7 @@
#include <api/depth_to_space.hpp>
#include <api/space_to_depth.hpp>
#include <api/batch_to_space.hpp>
#include <api/space_to_batch.hpp>
#include <api/shuffle_channels.hpp>
#include <api/strided_slice.hpp>
#include <api/reverse_sequence.hpp>
@@ -540,6 +541,7 @@ Program::LayerType Program::LayerTypeFromStr(const std::string &str) {
{ "DepthToSpace" , DepthToSpace },
{ "SpaceToDepth" , SpaceToDepth },
{ "BatchToSpace", BatchToSpace },
{ "SpaceToBatch" , SpaceToBatch },
{ "ShuffleChannels" , ShuffleChannels },
{ "StridedSlice" , StridedSlice },
{ "ReverseSequence" , ReverseSequence },
@@ -1243,6 +1245,8 @@ void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, InferenceEng
break;
case BatchToSpace: CreateBatchToSpacePrimitive(topology, layer);
break;
case SpaceToBatch: CreateSpaceToBatchPrimitive(topology, layer);
break;
case ShuffleChannels: CreateShuffleChannelsPrimitive(topology, layer);
break;
case StridedSlice: CreateStridedSlicePrimitive(topology, layer);
@@ -3914,6 +3918,62 @@ void Program::CreateBatchToSpacePrimitive(cldnn::topology& topology, InferenceEn
AddPrimitiveToProfiler(batchToSpaceName, layer);
}
void Program::CreateSpaceToBatchPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
ValidateLayer(layer, 4);
auto inputPrimitives = GetPrevLayersPrimitives(layer);
auto spaceToBatch = as<InferenceEngine::GenericLayer*> (layer);
auto rank = spaceToBatch->input().get()->getTensorDesc().getDims().size();
auto format = FormatFromLayout(spaceToBatch->input()->getLayout());
std::vector<cldnn::tensor> inputs;
inputs.reserve(3);
for (size_t i = 1; i < 4; ++i) {
auto defaultIndexInput = layer->insData[i].lock();
auto defaultIndexInputCreator = defaultIndexInput->getCreatorLayer().lock();
if (defaultIndexInputCreator->blobs.size() == 1) {
auto constantBlob = defaultIndexInputCreator->blobs.begin()->second;
auto defaultIndexPrecision = constantBlob->getTensorDesc().getPrecision();
std::vector<int32_t> sizes;
sizes.reserve(rank);
int32_t default_size = i == 1 ? 1 : 0;
switch (defaultIndexPrecision) {
case InferenceEngine::Precision::I32: {
auto data = constantBlob->buffer().as<int32_t*>();
sizes = std::vector<int32_t>(data, data + rank);
break;
}
case InferenceEngine::Precision::I64: {
auto data = constantBlob->buffer().as<int64_t*>();
std::vector<int64_t> sizes_i64 = std::vector<int64_t>(data, data + rank);
for (size_t j = 0; j < sizes_i64.size(); ++j)
sizes.emplace_back(static_cast<int32_t>(sizes_i64[j]));
break;
}
default: {
THROW_IE_EXCEPTION << layer->name << "Incorrect SpaceToBatch precision";
break;
}
}
inputs.emplace_back(format, sizes, default_size);
}
}
auto out_size = CldnnTensorFromIEDims(spaceToBatch->outData[0]->getTensorDesc().getDims());
std::string spaceToBatchName = layer_type_name_ID(layer);
auto spaceToBatchPrim = cldnn::space_to_batch(
spaceToBatchName,
inputPrimitives[0], //input
inputs[0], //block_shape
inputs[1], //pads_begin
inputs[2], //pads_end
out_size);
topology.add(spaceToBatchPrim);
AddPrimitiveToProfiler(spaceToBatchName, layer);
}
void Program::CreateShuffleChannelsPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
ValidateLayer(layer, 1);

View File

@@ -177,6 +177,7 @@ public:
DepthToSpace,
SpaceToDepth,
BatchToSpace,
SpaceToBatch,
ShuffleChannels,
StridedSlice,
Broadcast,
@@ -358,6 +359,7 @@ private:
void CreateDepthToSpacePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateSpaceToDepthPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateBatchToSpacePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateSpaceToBatchPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateShuffleChannelsPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateStridedSlicePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateBroadcastPrimitive(cldnn::topology &topology, InferenceEngine::CNNLayerPtr &layer);

View File

@@ -0,0 +1,29 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/space_to_batch.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
spaceToBatchParamsTuple stb_only_test_cases[] = {
spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 2, 2},
InferenceEngine::Precision::FP32, CommonTestUtils::DEVICE_GPU),
spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 3, 2, 2},
InferenceEngine::Precision::FP32, CommonTestUtils::DEVICE_GPU),
spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 4, 4},
InferenceEngine::Precision::FP32, CommonTestUtils::DEVICE_GPU),
spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 2}, {0, 0, 0, 0}, {2, 1, 2, 4},
InferenceEngine::Precision::FP32, CommonTestUtils::DEVICE_GPU),
spaceToBatchParamsTuple({1, 1, 3, 2, 2}, {0, 0, 1, 0, 3}, {0, 0, 2, 0, 0}, {1, 1, 3, 2, 1},
InferenceEngine::Precision::FP32, CommonTestUtils::DEVICE_GPU),
};
INSTANTIATE_TEST_CASE_P(smoke_CLDNN, SpaceToBatchLayerTest, ::testing::ValuesIn(stb_only_test_cases),
SpaceToBatchLayerTest::getTestCaseName);
} // namespace

View File

@@ -0,0 +1,86 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief SpaceToBatch operation divides "spatial" dimensions [1, ..., N - 1], N ∈ {4,5,6} of the data input
/// into a grid of blocks of shape block_shape, and interleaves these blocks with the batch dimension (0) such that in the output,
/// the spatial dimensions [1, ..., N - 1], N ∈ {4,5,6} correspond to the position within the grid,
/// and the batch dimension combines both the position within a spatial block and the original batch position.
/// Prior to division into blocks, the spatial dimensions of the input are optionally zero padded according to pads_begin and pads_end.
/// @details The SpaceToBatch operation is similar to the TensorFlow* operation SpaceToBatchND (https://www.tensorflow.org/api_docs/python/tf/space_to_batch_nd)
/// There are 4 inputs of this operation:
/// 1) data - input N-D tensor [batch, D_1, D_2 ... D_{N-1}], N ∈ {4,5,6}. Required.
/// 2) block_shape - input 1-D tensor with shape [N], N ∈ {4,5,6}. Consists of block_sizes each of which specifies the size of the value block to be moved.
/// All values must be >= 1 and required. block_shape[0] is expected to be 1.
/// 3) pads_begin - input 1-D tensor with shape [N], N ∈ {4,5,6}. Specifies the padding for the beginning along each axis of data input.
/// All values must be non-negative and required. pads_begin[0] is expected to be 0.
/// 4) pads_end - input 1-D tensor with shape [N], N ∈ {4,5,6}. Specifies the padding for the ending along each axis of data input.
/// All values must be non-negative and required. pads_end[0] is expected to be 0.
/// 3-4 inputs required that block_shape[i] divides data_shape[i] + pads_begin[i] + pads_end[i]
///
/// The operation is equivalent to the following transformation of the input tensor data of shape [batch, D_1, D_2 ... D_{N - 1}], N ∈ {4,5,6}
/// and block_shape, pads_begin, pads_end of shapes [N] to Y output tensor.
/// Zero-pad the start and end of dimensions [D_0, ..., D_{N - 1}] of the input according to `pads_begin` and `pads_end`
///
/// x' = reshape(x, [batch, (D_1 + P_1) / B_1, B_1, (D_2 + P_2) / B_2, B_2, ..., (D_{N - 1} + P_{N - 1}) / B_{N - 1}, B_{N - 1}]), where B_i = block_shape[i]
///
/// x'' = transpose(x', [2, 4, ..., (N - 1) + (N - 1), 0, 1, 3, ..., N + (N - 1)])
///
/// y = reshape(x'', [batch * B_1 * ... * B_{N - 1}, (D_1 + P_1) / B_1, (D_2 + P_2) / B_2, ... , (D_{N - 1} + P_{N - 1}) / B_{N - 1}])
struct space_to_batch : public primitive_base<space_to_batch> {
CLDNN_DECLARE_PRIMITIVE(space_to_batch)
/// @brief Constructs space_to_batch primitive.
/// @param id This primitive id.
/// @param input Input data primitive id.
/// @param block_shape Array of block sizes.
/// @param pads_begin Amount to pad for the beginning along each axis of data input.
/// @param pads_end Amount to pad for the ending along each axis of data input.
/// @param out_size Size of output tensor.
space_to_batch(const primitive_id& id,
const primitive_id& input,
const tensor& block_shape,
const tensor& pads_begin,
const tensor& pads_end,
const tensor& out_size,
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
block_shape(block_shape),
pads_begin(pads_begin),
pads_end(pads_end),
out_size(out_size) {}
tensor block_shape;
tensor pads_begin;
tensor pads_end;
tensor out_size;
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@@ -81,6 +81,7 @@ enum class KernelType {
REDUCE,
GATHER_TREE,
SPACE_TO_DEPTH,
SPACE_TO_BATCH,
GRN,
CTC_GREEDY_DECODER,
CUM_SUM,

View File

@@ -0,0 +1,102 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "space_to_batch_kernel_base.h"
#include "kernel_selector_utils.h"
#include <string>
namespace kernel_selector {
bool SpaceToBatchKernelBase::Validate(const Params& p, const optional_params& o) const {
if (p.GetType() != KernelType::SPACE_TO_BATCH ||
o.GetType() != KernelType::SPACE_TO_BATCH) {
return false;
}
return true;
}
CommonDispatchData SpaceToBatchKernelBase::SetDefault(const space_to_batch_params& params, const optional_params&) const {
CommonDispatchData runInfo;
std::vector<size_t> global = { params.output.Batch().v,
params.output.Feature().v,
params.output.W().v * params.output.Z().v * params.output.Y().v * params.output.X().v };
auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
runInfo.gws0 = global[0];
runInfo.gws1 = global[1];
runInfo.gws2 = global[2];
runInfo.lws0 = local[0];
runInfo.lws1 = local[1];
runInfo.lws2 = local[2];
return runInfo;
}
JitConstants SpaceToBatchKernelBase::GetJitConstants(const space_to_batch_params& params) const {
JitConstants jit = MakeBaseParamsJitConstants(params);
auto makeJitConstForParam = [](JitConstants& jit, const std::string name, const DimTensor<uint32_t>& args, const size_t default_value) {
jit.AddConstant(MakeJitConstant(name + "_SIZES", args));
jit.AddConstant(MakeJitConstant(name + "_BATCH", args.b));
jit.AddConstant(MakeJitConstant(name + "_FEATURE", args.f));
jit.AddConstant(MakeJitConstant(name + "_Y", args.y));
jit.AddConstant(MakeJitConstant(name + "_X", args.x));
if (args.w != 0) {
jit.AddConstant(MakeJitConstant(name + "_W", args.w));
jit.AddConstant(MakeJitConstant(name + "_Z", args.z));
} else if(args.z != 0) {
jit.AddConstant(MakeJitConstant(name + "_W", default_value));
jit.AddConstant(MakeJitConstant(name + "_Z", args.z));
} else {
jit.AddConstant(MakeJitConstant(name + "_W", default_value));
jit.AddConstant(MakeJitConstant(name + "_Z", default_value));
}
};
makeJitConstForParam(jit, "BLOCK_SHAPE", params.block_shape, 1);
makeJitConstForParam(jit, "PADS_BEGIN", params.pads_begin, 0);
makeJitConstForParam(jit, "PADS_END", params.pads_end, 0);
return jit;
}
KernelsData SpaceToBatchKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimatedTime) const {
KernelData kd = KernelData::Default<space_to_batch_params>(params);
space_to_batch_params& newParams = *static_cast<space_to_batch_params*>(kd.params.get());
if (!Validate(params, options)) {
return {};
}
auto runInfo = SetDefault(newParams, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto cldnn_jit = GetJitConstants(newParams);
std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
kd.estimatedTime = estimatedTime;
return { kd };
}
} // namespace kernel_selector

View File

@@ -0,0 +1,63 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "common_kernel_base.h"
#include "kernel_selector_params.h"
#include <vector>
namespace kernel_selector {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// space_to_batch_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct space_to_batch_params : public base_params {
space_to_batch_params() : base_params(KernelType::SPACE_TO_BATCH) {}
DimTensor<uint32_t> block_shape;
DimTensor<uint32_t> pads_begin;
DimTensor<uint32_t> pads_end;
virtual ParamsKey GetParamsKey() const { return base_params::GetParamsKey(); }
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// space_to_batch_optional_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct space_to_batch_optional_params : optional_params {
space_to_batch_optional_params() : optional_params(KernelType::SPACE_TO_BATCH) {}
};
struct space_to_batch_fuse_params : fuse_params {
space_to_batch_fuse_params() : fuse_params(KernelType::SPACE_TO_BATCH) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// SpaceToBatchKernelBase
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class SpaceToBatchKernelBase : public common_kernel_base {
public:
using common_kernel_base::common_kernel_base;
virtual ~SpaceToBatchKernelBase() {}
struct DispatchData : public CommonDispatchData {};
protected:
virtual bool Validate(const Params&, const optional_params&) const;
virtual JitConstants GetJitConstants(const space_to_batch_params& params) const;
virtual CommonDispatchData SetDefault(const space_to_batch_params& params, const optional_params&) const;
KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const;
};
} // namespace kernel_selector

View File

@@ -0,0 +1,46 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "space_to_batch_kernel_ref.h"
#include "kernel_selector_utils.h"
#include <string>
#include <vector>
namespace kernel_selector {
ParamsKey SpaceToBatchKernelRef::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableInputLayout(DataLayout::bfzyx);
k.EnableInputLayout(DataLayout::bfwzyx);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::bfzyx);
k.EnableOutputLayout(DataLayout::bfwzyx);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBatching();
return k;
}
KernelsData SpaceToBatchKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
}
} // namespace kernel_selector

View File

@@ -0,0 +1,30 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "space_to_batch_kernel_base.h"
namespace kernel_selector {
class SpaceToBatchKernelRef : public SpaceToBatchKernelBase {
public:
SpaceToBatchKernelRef() : SpaceToBatchKernelBase("space_to_batch_ref") {}
virtual ~SpaceToBatchKernelRef() {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
};
} // namespace kernel_selector

View File

@@ -0,0 +1,29 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "space_to_batch_kernel_selector.h"
#include "space_to_batch_kernel_ref.h"
namespace kernel_selector {
space_to_batch_kernel_selector::space_to_batch_kernel_selector() {
Attach<SpaceToBatchKernelRef>();
}
KernelsData space_to_batch_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
return GetNaiveBestKernel(params, options, KernelType::SPACE_TO_BATCH);
}
} // namespace kernel_selector

View File

@@ -0,0 +1,35 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "kernel_selector.h"
namespace kernel_selector {
class space_to_batch_kernel_selector : public kernel_selector_base {
public:
static space_to_batch_kernel_selector& Instance() {
static space_to_batch_kernel_selector instance_;
return instance_;
}
space_to_batch_kernel_selector();
virtual ~space_to_batch_kernel_selector() {}
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
};
} // namespace kernel_selector

View File

@@ -0,0 +1,73 @@
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "include/include_all.cl"
KERNEL(space_to_batch_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
{
const uint batch = get_global_id(0);
const uint feature = get_global_id(1);
#ifdef OUTPUT_LAYOUT_BFYX
const uint w = 0;
const uint z = 0;
const uint y = (uint)get_global_id(2) / OUTPUT_SIZE_X;
const uint x = (uint)get_global_id(2) % OUTPUT_SIZE_X;
#elif OUTPUT_LAYOUT_BFZYX
const uint w = 0;
const uint yx = (uint)get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
const uint z = (uint)get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
const uint y = yx / OUTPUT_SIZE_X;
const uint x = yx % OUTPUT_SIZE_X;
#elif OUTPUT_LAYOUT_BFWZYX
const uint zyx = (uint)get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z);
const uint w = (uint)get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z);
const uint yx = zyx % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
const uint z = zyx / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
const uint y = yx / OUTPUT_SIZE_X;
const uint x = yx % OUTPUT_SIZE_X;
#endif
const uint input_batch = batch % INPUT0_BATCH_NUM;
const uint offset_batch = batch / INPUT0_BATCH_NUM;
const int input_feature = feature * BLOCK_SHAPE_FEATURE - PADS_BEGIN_FEATURE +
offset_batch / (BLOCK_SHAPE_W * BLOCK_SHAPE_Z * BLOCK_SHAPE_Y * BLOCK_SHAPE_X);
const uint offset_feature = offset_batch % (BLOCK_SHAPE_W * BLOCK_SHAPE_Z * BLOCK_SHAPE_Y * BLOCK_SHAPE_X);
const int input_w = w * BLOCK_SHAPE_W - PADS_BEGIN_W + offset_feature / (BLOCK_SHAPE_Z * BLOCK_SHAPE_Y * BLOCK_SHAPE_X);
const uint offset_w = offset_feature % (BLOCK_SHAPE_Z * BLOCK_SHAPE_Y * BLOCK_SHAPE_X);
const int input_z = z * BLOCK_SHAPE_Z - PADS_BEGIN_Z + offset_w / (BLOCK_SHAPE_Y * BLOCK_SHAPE_X);
const uint offset_z = offset_w % (BLOCK_SHAPE_Y * BLOCK_SHAPE_X);
const int input_y = y * BLOCK_SHAPE_Y - PADS_BEGIN_Y + offset_z / BLOCK_SHAPE_X;
const uint offset_y = offset_z % BLOCK_SHAPE_X;
const int input_x = x * BLOCK_SHAPE_X - PADS_BEGIN_X + offset_y;
const int input_index = GET_DATA_INDEX_6D(INPUT0, input_batch, input_feature, input_w, input_z, input_y, input_x);
const uint output_index = GET_DATA_INDEX_6D(OUTPUT, batch, feature, w, z, y, x);
const bool out_of_bounds = input_feature < 0 || input_feature >= INPUT0_FEATURE_NUM ||
input_w < 0 || input_w >= INPUT0_SIZE_W ||
input_z < 0 || input_z >= INPUT0_SIZE_Z ||
input_y < 0 || input_y >= INPUT0_SIZE_Y ||
input_x < 0 || input_x >= INPUT0_SIZE_X;
INPUT0_TYPE in = out_of_bounds ? INPUT0_VAL_ZERO : input[input_index];
output[output_index] = ACTIVATION(in, ACTIVATION_PARAMS);
}

View File

@@ -85,6 +85,7 @@ void register_implementations_gpu() {
REGISTER_GPU(shuffle_channels);
REGISTER_GPU(softmax);
REGISTER_GPU(softmax_loss_grad);
REGISTER_GPU(space_to_batch);
REGISTER_GPU(space_to_depth);
REGISTER_GPU(strided_slice);
REGISTER_GPU(tile);

View File

@@ -77,6 +77,7 @@
#include "api/shuffle_channels.hpp"
#include "api/softmax.hpp"
#include "api/softmax_loss_grad.hpp"
#include "api/space_to_batch.hpp"
#include "api/strided_slice.hpp"
#include "api/tile.hpp"
#include "api/resample.hpp"
@@ -163,6 +164,7 @@ REGISTER_GPU(select);
REGISTER_GPU(shuffle_channels);
REGISTER_GPU(softmax);
REGISTER_GPU(softmax_loss_grad);
REGISTER_GPU(space_to_batch);
REGISTER_GPU(space_to_depth);
REGISTER_GPU(strided_slice);
REGISTER_GPU(tile);

View File

@@ -0,0 +1,75 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "space_to_batch_inst.h"
#include "primitive_gpu_base.h"
#include "implementation_map.h"
#include "kernel_selector_helper.h"
#include "space_to_batch/space_to_batch_kernel_selector.h"
#include "space_to_batch/space_to_batch_kernel_ref.h"
#include "error_handler.h"
#include "data_inst.h"
#include <vector>
using namespace cldnn;
namespace cldnn {
namespace gpu {
struct space_to_batch_gpu : typed_primitive_gpu_impl<space_to_batch> {
using parent = typed_primitive_gpu_impl<space_to_batch>;
using parent::parent;
public:
static primitive_impl* create(const space_to_batch_node& arg) {
auto space_to_batch_params = get_default_params<kernel_selector::space_to_batch_params>(arg);
auto space_to_batch_optional_params =
get_default_optional_params<kernel_selector::space_to_batch_optional_params>(arg.get_program());
auto primitive = arg.get_primitive();
space_to_batch_params.block_shape = convert_dim_vector(primitive->block_shape);
space_to_batch_params.pads_begin = convert_dim_vector(primitive->pads_begin);
space_to_batch_params.pads_end = convert_dim_vector(primitive->pads_end);
auto& kernel_selector = kernel_selector::space_to_batch_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(space_to_batch_params, space_to_batch_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto space_to_batch = new space_to_batch_gpu(arg, best_kernels[0]);
return space_to_batch;
}
};
namespace detail {
attach_space_to_batch_gpu::attach_space_to_batch_gpu() {
auto val_fw = space_to_batch_gpu::create;
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
}
} // namespace detail
} // namespace gpu
} // namespace cldnn

View File

@@ -49,6 +49,7 @@
#include "gather_inst.h"
#include "reverse_sequence_inst.h"
#include "shuffle_channels_inst.h"
#include "space_to_batch_inst.h"
#include "strided_slice_inst.h"
#include "cum_sum_inst.h"
#include "embedding_bag_inst.h"
@@ -200,10 +201,10 @@ void prepare_primitive_fusing::fuse_activations(program_impl &p) {
!input.is_type<permute>() && !input.is_type<pooling>() && !input.is_type<reorder>() &&
!input.is_type<reshape>() && !input.is_type<roi_pooling>() && !input.is_type<scale>() &&
!input.is_type<softmax>() && !input.is_type<resample>() && !input.is_type<mvn>() &&
!input.is_type<depth_to_space>() && !input.is_type<batch_to_space>() && !input.is_type<gather>() &&
!input.is_type<shuffle_channels>() && !input.is_type<strided_slice>() && !input.is_type<cum_sum>() &&
!input.is_type<reverse_sequence>() && !input.is_type<embedding_bag>() && !input.is_type<fused_conv_eltwise>() &&
!input.is_type<activation>()))
!input.is_type<depth_to_space>() && !input.is_type<batch_to_space>() && !input.is_type<space_to_batch>() &&
!input.is_type<gather>() && !input.is_type<shuffle_channels>() && !input.is_type<strided_slice>() &&
!input.is_type<cum_sum>() && !input.is_type<reverse_sequence>() && !input.is_type<embedding_bag>() &&
!input.is_type<fused_conv_eltwise>() && !input.is_type<activation>()))
return;
if (input.is_type<eltwise>()) {

View File

@@ -0,0 +1,49 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "api/space_to_batch.hpp"
#include "primitive_inst.h"
#include <string>
namespace cldnn {
template <>
struct typed_program_node<space_to_batch> : public typed_program_node_base<space_to_batch> {
using parent = typed_program_node_base<space_to_batch>;
public:
using parent::parent;
program_node& input(size_t index = 0) const { return get_dependency(index); }
};
using space_to_batch_node = typed_program_node<space_to_batch>;
template <>
class typed_primitive_inst<space_to_batch> : public typed_primitive_inst_base<space_to_batch> {
using parent = typed_primitive_inst_base<space_to_batch>;
public:
static layout calc_output_layout(space_to_batch_node const& node);
static std::string to_string(space_to_batch_node const& node);
public:
typed_primitive_inst(network_impl& network, space_to_batch_node const& desc);
};
using space_to_batch_inst = typed_primitive_inst<space_to_batch>;
} // namespace cldnn

View File

@@ -0,0 +1,90 @@
/*
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "space_to_batch_inst.h"
#include "primitive_type_base.h"
#include "error_handler.h"
#include "json_object.h"
#include "data_inst.h"
#include <string>
#include <vector>
namespace cldnn {
primitive_type_id cldnn::space_to_batch::type_id() {
static primitive_type_base<space_to_batch> instance;
return &instance;
}
layout space_to_batch_inst::calc_output_layout(space_to_batch_node const& node) {
auto desc = node.get_primitive();
auto input_layout = node.input(0).get_output_layout();
auto input_format = input_layout.format;
const size_t dims_num = format::dimension(input_format);
const auto& block_shape = desc->block_shape;
const auto& pads_begin = desc->pads_begin;
const auto& pads_end = desc->pads_end;
if (block_shape.batch[0] != 1)
CLDNN_ERROR_MESSAGE(node.id(),
"block_shape[0] is expected to be 1. Actual block_shape[0] is " +
std::to_string(block_shape.batch[0]));
if (pads_begin.batch[0] != 0)
CLDNN_ERROR_MESSAGE(node.id(),
"pads_begin[0] is expected to be 0. Actual pads_begin[0] is " +
std::to_string(pads_begin.batch[0]));
if (pads_end.batch[0] != 0)
CLDNN_ERROR_MESSAGE(node.id(),
"pads_end[0] is expected to be 0. Actual pads_end[0] is " +
std::to_string(pads_end.batch[0]));
if ((input_layout.size.sizes(input_format)[1] + pads_begin.feature[0] + pads_end.feature[0]) % block_shape.feature[0] != 0)
CLDNN_ERROR_MESSAGE(node.id(),
"Input feature shape after padding must be divisible by block_shape");
for (size_t i = 2; i < dims_num; ++i)
if ((input_layout.size.sizes(input_format)[dims_num - i + 1] + pads_begin.sizes()[i] + pads_end.sizes()[i]) % block_shape.sizes()[i] != 0)
CLDNN_ERROR_MESSAGE(node.id(),
"Input spatial shapes after padding must be divisible by block_shape");
return layout{input_layout.data_type, input_format, desc->out_size};
}
std::string space_to_batch_inst::to_string(space_to_batch_node const& node) {
auto desc = node.get_primitive();
auto node_info = node.desc_to_json();
auto& input = node.input();
std::stringstream primitive_description;
json_composite space_to_batch_info;
space_to_batch_info.add("input id", input.id());
node_info->add("space_to_batch_info", space_to_batch_info);
node_info->dump(primitive_description);
return primitive_description.str();
}
space_to_batch_inst::typed_primitive_inst(network_impl& network, space_to_batch_node const& node)
: parent(network, node) {}
} // namespace cldnn

View File

@@ -0,0 +1,495 @@
// Copyright (c) 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////////////////////////////////
#include <gtest/gtest.h>
#include <api/input_layout.hpp>
#include <api/memory.hpp>
#include <api/space_to_batch.hpp>
#include <api/topology.hpp>
#include <api/network.hpp>
#include <api/data.hpp>
#include <cstddef>
#include <tests/test_utils/test_utils.h>
using namespace cldnn;
using namespace ::tests;
TEST(space_to_batch_fp16_gpu, i1222_bs1222_pb0000_pe0000) {
// Input : 1x2x2x2
// Block shape : 1x2x2x2
// Pads begin : 0x0x0x0
// Pads end : 0x0x0x0
// Output : 8x1x1x1
// Input values in fp16
engine engine;
auto input = memory::allocate(engine, { data_types::f16, format::bfyx, {1,2,2,2} });
set_values(input, {
FLOAT16(0.0f), FLOAT16(1.0f),
FLOAT16(2.0f), FLOAT16(3.0f),
FLOAT16(4.0f), FLOAT16(5.0f),
FLOAT16(6.0f), FLOAT16(7.0f)
});
topology topology;
topology.add(input_layout("Input", input.get_layout()));
topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,2}, 1),
tensor(format::bfyx, {0,0,0,0}, 0),
tensor(format::bfyx, {0,0,0,0}, 0),
tensor(format::bfyx, {8,1,1,1}, 1)));
network network(engine, topology);
network.set_input_data("Input", input);
auto outputs = network.execute();
auto output = outputs.at("space_to_batch").get_memory();
auto output_ptr = output.pointer<uint16_t>();
std::vector<float> expected_results = {
0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f
};
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
}
}
TEST(space_to_batch_fp16_gpu, i1242_bs1221_pb0020_pe0000) {
// Input : 1x2x4x2
// Block shape : 1x2x2x1
// Pads begin : 0x0x2x0
// Pads end : 0x0x0x0
// Output : 4x1x3x2
// Input values in fp16
engine engine;
auto input = memory::allocate(engine, { data_types::f16, format::bfyx, {1,2,2,4} });
set_values(input, {
FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f),
FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f),
FLOAT16(8.0f), FLOAT16(9.0f), FLOAT16(10.0f), FLOAT16(11.0f),
FLOAT16(12.0f), FLOAT16(13.0f), FLOAT16(14.0f), FLOAT16(15.0f)
});
topology topology;
topology.add(input_layout("Input", input.get_layout()));
topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,1}, 1),
tensor(format::bfyx, {0,0,2,0}, 0),
tensor(format::bfyx, {0,0,0,0}, 0),
tensor(format::bfyx, {4,1,3,2}, 1)));
network network(engine, topology);
network.set_input_data("Input", input);
auto outputs = network.execute();
auto output = outputs.at("space_to_batch").get_memory();
auto output_ptr = output.pointer<uint16_t>();
std::vector<float> expected_results = {
0.f, 0.f, 0.f, 1.f, 4.f, 5.f,
0.f, 0.f, 2.f, 3.f, 6.f, 7.f,
0.f, 0.f, 8.f, 9.f, 12.f, 13.f,
0.f, 0.f, 10.f, 11.f, 14.f, 15.f
};
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
}
}
TEST(space_to_batch_fp16_gpu, i2132_bs1222_pb0010_pe0100) {
// Input : 2x1x3x2
// Block shape : 1x2x2x2
// Pads begin : 0x0x1x0
// Pads end : 0x1x0x0
// Output : 16x1x2x1
// Input values in fp16
engine engine;
auto input = memory::allocate(engine, { data_types::f16, format::bfyx, {2,1,2,3} });
set_values(input, {
FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f),
FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f),
FLOAT16(8.0f), FLOAT16(9.0f), FLOAT16(10.0f), FLOAT16(11.0f)
});
topology topology;
topology.add(input_layout("Input", input.get_layout()));
topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,2}, 1),
tensor(format::bfyx, {0,0,1,0}, 0),
tensor(format::bfyx, {0,1,0,0}, 0),
tensor(format::bfyx, {16,1,2,1}, 1)));
network network(engine, topology);
network.set_input_data("Input", input);
auto outputs = network.execute();
auto output = outputs.at("space_to_batch").get_memory();
auto output_ptr = output.pointer<uint16_t>();
std::vector<float> expected_results = {
0.f, 2.f, 0.f, 8.f, 0.f, 3.f, 0.f, 9.f,
0.f, 4.f, 6.f, 10.f, 1.f, 5.f, 7.f, 11.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f
};
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
}
}
TEST(space_to_batch_fp16_gpu, i12132_bs12122_pb00010_pe00000) {
// Input : 1x2x1x3x2
// Block shape : 1x2x1x2x2
// Pads begin : 0x0x0x1x0
// Pads end : 0x0x0x0x0
// Output : 8x1x1x2x1
// Input values in fp16
engine engine;
auto input = memory::allocate(engine, { data_types::f16, format::bfzyx, {1,2,2,3,1} });
set_values(input, {
FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f),
FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f),
FLOAT16(8.0f), FLOAT16(9.0f), FLOAT16(10.0f), FLOAT16(11.0f)
});
topology topology;
topology.add(input_layout("Input", input.get_layout()));
topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfzyx, {1,2,1,2,2}, 1),
tensor(format::bfzyx, {0,0,0,1,0}, 0),
tensor(format::bfzyx, {0,0,0,0,0}, 0),
tensor(format::bfzyx, {8,1,1,2,1}, 1)));
network network(engine, topology);
network.set_input_data("Input", input);
auto outputs = network.execute();
auto output = outputs.at("space_to_batch").get_memory();
auto output_ptr = output.pointer<uint16_t>();
std::vector<float> expected_results = {
0.f, 2.f, 0.f, 3.f, 0.f, 4.f, 1.f, 5.f,
0.f, 8.f, 0.f, 9.f, 6.f, 10.f, 7.f, 11.f
};
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
}
}
TEST(space_to_batch_fp16_gpu, i134121_bs142121_pb010100_pe000000) {
// Input : 1x3x4x1x2x1
// Block shape : 1x4x2x1x2x1
// Pads begin : 0x1x0x1x0x0
// Pads end : 0x0x0x0x0x0
// Output : 16x1x2x2x1x1
// Input values in fp16
engine engine;
tensor input_shape = tensor{ batch(1), feature(3), spatial(1, 2, 1, 4) };
auto input = memory::allocate(engine, { data_types::f16, format::bfwzyx, input_shape });
set_values(input, {
FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f),
FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f),
FLOAT16(8.0f), FLOAT16(9.0f), FLOAT16(10.0f), FLOAT16(11.0f),
FLOAT16(12.0f), FLOAT16(13.0f), FLOAT16(14.0f), FLOAT16(15.0f),
FLOAT16(16.0f), FLOAT16(17.0f), FLOAT16(18.0f), FLOAT16(19.0f),
FLOAT16(20.0f), FLOAT16(21.0f), FLOAT16(22.0f), FLOAT16(23.0f)
});
topology topology;
topology.add(input_layout("Input", input.get_layout()));
topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfwzyx, {1,4,2,1,2,1}, 1),
tensor(format::bfwzyx, {0,1,0,1,0,0}, 0),
tensor(format::bfwzyx, {0,0,0,0,0,0}, 0),
tensor(format::bfwzyx, {16,1,2,2,1,1}, 1)));
network network(engine, topology);
network.set_input_data("Input", input);
auto outputs = network.execute();
auto output = outputs.at("space_to_batch").get_memory();
auto output_ptr = output.pointer<uint16_t>();
std::vector<float> expected_results = {
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 4.f, 0.f, 1.f, 0.f, 5.f,
0.f, 2.f, 0.f, 6.f, 0.f, 3.f, 0.f, 7.f,
0.f, 8.f, 0.f, 12.f, 0.f, 9.f, 0.f, 13.f,
0.f, 10.f, 0.f, 14.f, 0.f, 11.f, 0.f, 15.f,
0.f, 16.f, 0.f, 20.f, 0.f, 17.f, 0.f, 21.f,
0.f, 18.f, 0.f, 22.f, 0.f, 19.f, 0.f, 23.f
};
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
}
}
TEST(space_to_batch_fp32_gpu, i1222_bs1222_pb0000_pe0000) {
// Input : 1x2x2x2
// Block shape : 1x2x2x2
// Pads begin : 0x0x0x0
// Pads end : 0x0x0x0
// Output : 8x1x1x1
// Input values in fp32
engine engine;
auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {1,2,2,2} });
set_values(input, {
0.0f, 1.0f, 2.0f, 3.0f,
4.0f, 5.0f, 6.0f, 7.0f
});
topology topology;
topology.add(input_layout("Input", input.get_layout()));
topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,2}, 1),
tensor(format::bfyx, {0,0,0,0}, 0),
tensor(format::bfyx, {0,0,0,0}, 0),
tensor(format::bfyx, {8,1,1,1}, 1)));
network network(engine, topology);
network.set_input_data("Input", input);
auto outputs = network.execute();
auto output = outputs.at("space_to_batch").get_memory();
auto output_ptr = output.pointer<float>();
std::vector<float> expected_results = {
0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f
};
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], output_ptr[i]);
}
}
TEST(space_to_batch_fp32_gpu, i1242_bs1221_pb0020_pe0000) {
// Input : 1x2x4x2
// Block shape : 1x2x2x1
// Pads begin : 0x0x2x0
// Pads end : 0x0x0x0
// Output : 4x1x3x2
// Input values in fp32
engine engine;
auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {1,2,2,4} });
set_values(input, {
0.0f, 1.0f, 2.0f, 3.0f,
4.0f, 5.0f, 6.0f, 7.0f,
8.0f, 9.0f, 10.0f, 11.0f,
12.0f, 13.0f, 14.0f, 15.0f
});
topology topology;
topology.add(input_layout("Input", input.get_layout()));
topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,1}, 1),
tensor(format::bfyx, {0,0,2,0}, 0),
tensor(format::bfyx, {0,0,0,0}, 0),
tensor(format::bfyx, {4,1,3,2}, 1)));
network network(engine, topology);
network.set_input_data("Input", input);
auto outputs = network.execute();
auto output = outputs.at("space_to_batch").get_memory();
auto output_ptr = output.pointer<float>();
std::vector<float> expected_results = {
0.f, 0.f, 0.f, 1.f, 4.f, 5.f,
0.f, 0.f, 2.f, 3.f, 6.f, 7.f,
0.f, 0.f, 8.f, 9.f, 12.f, 13.f,
0.f, 0.f, 10.f, 11.f, 14.f, 15.f
};
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], output_ptr[i]);
}
}
TEST(space_to_batch_fp32_gpu, i2132_bs1222_pb0010_pe0100) {
// Input : 2x1x3x2
// Block shape : 1x2x2x2
// Pads begin : 0x0x1x0
// Pads end : 0x1x0x0
// Output : 16x1x2x1
// Input values in fp32
engine engine;
auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {2,1,2,3} });
set_values(input, {
0.0f, 1.0f, 2.0f, 3.0f,
4.0f, 5.0f, 6.0f, 7.0f,
8.0f, 9.0f, 10.0f, 11.0f
});
topology topology;
topology.add(input_layout("Input", input.get_layout()));
topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,2}, 1),
tensor(format::bfyx, {0,0,1,0}, 0),
tensor(format::bfyx, {0,1,0,0}, 0),
tensor(format::bfyx, {16,1,2,1}, 1)));
network network(engine, topology);
network.set_input_data("Input", input);
auto outputs = network.execute();
auto output = outputs.at("space_to_batch").get_memory();
auto output_ptr = output.pointer<float>();
std::vector<float> expected_results = {
0.f, 2.f, 0.f, 8.f, 0.f, 3.f, 0.f, 9.f,
0.f, 4.f, 6.f, 10.f, 1.f, 5.f, 7.f, 11.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f
};
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], output_ptr[i]);
}
}
TEST(space_to_batch_fp32_gpu, i12132_bs12122_pb00010_pe00000) {
// Input : 1x2x1x3x2
// Block shape : 1x2x1x2x2
// Pads begin : 0x0x0x1x0
// Pads end : 0x0x0x0x0
// Output : 8x1x1x2x1
// Input values in fp32
engine engine;
auto input = memory::allocate(engine, { data_types::f32, format::bfzyx, {1,2,2,3,1} });
set_values(input, {
0.0f, 1.0f, 2.0f, 3.0f,
4.0f, 5.0f, 6.0f, 7.0f,
8.0f, 9.0f, 10.0f, 11.0f
});
topology topology;
topology.add(input_layout("Input", input.get_layout()));
topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfzyx, {1,2,1,2,2}, 1),
tensor(format::bfzyx, {0,0,0,1,0}, 0),
tensor(format::bfzyx, {0,0,0,0,0}, 0),
tensor(format::bfzyx, {8,1,1,2,1}, 1)));
network network(engine, topology);
network.set_input_data("Input", input);
auto outputs = network.execute();
auto output = outputs.at("space_to_batch").get_memory();
auto output_ptr = output.pointer<float>();
std::vector<float> expected_results = {
0.f, 2.f, 0.f, 3.f, 0.f, 4.f, 1.f, 5.f,
0.f, 8.f, 0.f, 9.f, 6.f, 10.f, 7.f, 11.f
};
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], output_ptr[i]);
}
}
TEST(space_to_batch_fp32_gpu, i134121_bs142121_pb010100_pe000000) {
// Input : 1x3x4x1x2x1
// Block shape : 1x4x2x1x2x1
// Pads begin : 0x1x0x1x0x0
// Pads end : 0x0x0x0x0x0
// Output : 16x1x2x2x1x1
// Input values in fp32
engine engine;
tensor input_shape = tensor{ batch(1), feature(3), spatial(1, 2, 1, 4) };
auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx, input_shape });
set_values(input, {
0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,
12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f
});
topology topology;
topology.add(input_layout("Input", input.get_layout()));
topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfwzyx, {1,4,2,1,2,1}, 1),
tensor(format::bfwzyx, {0,1,0,1,0,0}, 0),
tensor(format::bfwzyx, {0,0,0,0,0,0}, 0),
tensor(format::bfwzyx, {16,1,2,2,1,1}, 1)));
network network(engine, topology);
network.set_input_data("Input", input);
auto outputs = network.execute();
auto output = outputs.at("space_to_batch").get_memory();
auto output_ptr = output.pointer<float>();
std::vector<float> expected_results = {
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 4.f, 0.f, 1.f, 0.f, 5.f,
0.f, 2.f, 0.f, 6.f, 0.f, 3.f, 0.f, 7.f,
0.f, 8.f, 0.f, 12.f, 0.f, 9.f, 0.f, 13.f,
0.f, 10.f, 0.f, 14.f, 0.f, 11.f, 0.f, 15.f,
0.f, 16.f, 0.f, 20.f, 0.f, 17.f, 0.f, 21.f,
0.f, 18.f, 0.f, 22.f, 0.f, 19.f, 0.f, 23.f
};
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], output_ptr[i]);
}
}