From c7661078d9519a58ed83bc60513975c23e1ada5c Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 28 Oct 2020 09:24:22 +0300 Subject: [PATCH] [IE CLDNN] Removed unused DO gpu impl (#2809) --- .../thirdparty/clDNN/api/detection_output.hpp | 48 +-- .../thirdparty/clDNN/api/program.hpp | 17 +- .../kernel_selector/common/common_types.h | 1 - .../detection_output_kernel_base.cpp | 64 ---- .../detection_output_kernel_base.h | 78 ---- .../detection_output_kernel_ref.cpp | 88 ----- .../detection_output_kernel_ref.h | 33 -- .../detection_output_kernel_selector.cpp | 34 -- .../detection_output_kernel_selector.h | 48 --- .../detection_output_kernel_sort.cpp | 82 ----- .../detection_output_kernel_sort.h | 33 -- .../core/cl_kernels/detection_output.cl | 217 ----------- .../core/cl_kernels/detection_output_sort.cl | 217 ----------- .../thirdparty/clDNN/src/detection_output.cpp | 92 +---- .../clDNN/src/gpu/detection_output_cpu.cpp | 12 +- .../clDNN/src/gpu/detection_output_gpu.cpp | 164 --------- .../graph_optimizer/graph_initializations.cpp | 51 --- .../clDNN/src/include/detection_output_inst.h | 36 +- .../clDNN/src/include/pass_manager.h | 3 +- .../test_cases/detection_output_test.cpp | 344 +++--------------- 20 files changed, 77 insertions(+), 1585 deletions(-) delete mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.cpp delete mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.h delete mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp delete mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.h delete mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.cpp delete mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.h delete mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.cpp delete mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.h delete mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output.cl delete mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output_sort.cl delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/detection_output_gpu.cpp diff --git a/inference-engine/thirdparty/clDNN/api/detection_output.hpp b/inference-engine/thirdparty/clDNN/api/detection_output.hpp index 6df38c1cee7..577f753fa9d 100644 --- a/inference-engine/thirdparty/clDNN/api/detection_output.hpp +++ b/inference-engine/thirdparty/clDNN/api/detection_output.hpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -142,52 +142,6 @@ struct detection_output : public primitive_base { protected: }; -/// @brief Generates a list of detections based on location and confidence predictions by doing non maximum suppression. -/// @details Each row is a 7 dimension vector, which stores: [image_id, label, confidence, xmin, ymin, xmax, ymax]. -/// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1. -struct detection_output_sort - : public primitive_base { - CLDNN_DECLARE_PRIMITIVE(detection_output_sort) - - /// @brief Constructs detection output primitive. - /// @param id This primitive id. - /// @param input_bboxes Input bounding boxes primitive id. - /// @param num_images Number of images to be predicted. - /// @param num_classes Number of classes to be predicted. - /// @param keep_top_k Number of total bounding boxes to be kept per image after NMS step. - /// @param share_location If true bounding box are shared among different classes. - /// @param top_k Maximum number of results to be kept in NMS. - /// @param output_padding Output padding. - detection_output_sort(const primitive_id& id, - const primitive_id& input_bboxes, - const uint32_t num_images, - const uint32_t num_classes, - const uint32_t keep_top_k, - const bool share_location = true, - const int top_k = -1, - const int background_label_id = -1, - const padding& output_padding = padding()) - : primitive_base(id, {input_bboxes}, output_padding), - num_images(num_images), - num_classes(num_classes), - keep_top_k(keep_top_k), - share_location(share_location), - top_k(top_k), - background_label_id(background_label_id) {} - - /// @brief Number of classes to be predicted. - const uint32_t num_images; - /// @brief Number of classes to be predicted. - const uint32_t num_classes; - /// @brief Number of total bounding boxes to be kept per image after NMS step. - const int keep_top_k; - /// @brief If true, bounding box are shared among different classes. - const bool share_location; - /// @brief Maximum number of results to be kept in NMS. - const int top_k; - /// @brief Background label id (-1 if there is no background class). - const int background_label_id; -}; /// @} /// @} /// @} diff --git a/inference-engine/thirdparty/clDNN/api/program.hpp b/inference-engine/thirdparty/clDNN/api/program.hpp index 5f6a4f8a7d1..087aa475459 100644 --- a/inference-engine/thirdparty/clDNN/api/program.hpp +++ b/inference-engine/thirdparty/clDNN/api/program.hpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -46,9 +46,6 @@ enum class build_option_type { /// @brief Enable implicit static input reordering for user inputs (default: false). allow_static_input_reorder, - /// @brief Enable running detection output layer always on gpu, regardless performance - detection_output_gpu, - /// @brief Enable debug mode (default: false). /// @details This option enforce all program primitives to be accessible as outputs. debug, @@ -130,9 +127,6 @@ struct build_option { /// @brief Enable implicit reordering for static user inputs (default: false). static std::shared_ptr allow_static_input_reorder(bool enable = false); - /// @brief Enable running detection output layer always on GPU, regardless performance (default: false). - static std::shared_ptr detection_output_gpu(bool enable = false); - /// @brief Enable debug mode (default: false). /// @details This option enforce all program primitives to be accessible as outputs. static std::shared_ptr debug(bool enable = false); @@ -323,11 +317,6 @@ struct build_option_traits { static std::shared_ptr make_default() { return build_option::allow_static_input_reorder(); } }; template <> -struct build_option_traits { - typedef build_option_bool object_type; - static std::shared_ptr make_default() { return build_option::detection_output_gpu(); } -}; -template <> struct build_option_traits { typedef build_option_bool object_type; static std::shared_ptr make_default() { return build_option::debug(); } @@ -384,10 +373,6 @@ inline std::shared_ptr build_option::allow_static_input_reor return std::make_shared>(enable); } -inline std::shared_ptr build_option::detection_output_gpu(bool enable) { - return std::make_shared>(enable); -} - inline std::shared_ptr build_option::debug(bool enable) { return std::make_shared>(enable); } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h index 3978673e8e1..5eda7d2c99a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h @@ -56,7 +56,6 @@ enum class KernelType { PYRAMID_ROI_ALIGN, CONTRACT, ONE_HOT, - DETECTION_OUTPUT, GATHER, SCATTER_UPDATE, DEPTH_TO_SPACE, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.cpp deleted file mode 100644 index 418b124a0f7..00000000000 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2018-2020 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#include "detection_output_kernel_base.h" - -namespace kernel_selector { -JitConstants DetectionOutputKernelBase::GetJitConstants(const detection_output_params& params) const { - JitConstants jit = MakeBaseParamsJitConstants(params); - - const auto& detectOutParams = params.detectOutParams; - - jit.AddConstants({ - MakeJitConstant("NUM_IMAGES", detectOutParams.num_images), - MakeJitConstant("NUM_CLASSES", detectOutParams.num_classes), - MakeJitConstant("KEEP_TOP_K", detectOutParams.keep_top_k), - MakeJitConstant("TOP_K", detectOutParams.top_k), - MakeJitConstant("BACKGROUND_LABEL_ID", detectOutParams.background_label_id), - MakeJitConstant("CODE_TYPE", detectOutParams.code_type), - MakeJitConstant("CONF_SIZE_X", detectOutParams.conf_size_x), - MakeJitConstant("CONF_SIZE_Y", detectOutParams.conf_size_y), - MakeJitConstant("CONF_PADDING_X", detectOutParams.conf_padding_x), - MakeJitConstant("CONF_PADDING_Y", detectOutParams.conf_padding_y), - MakeJitConstant("SHARE_LOCATION", detectOutParams.share_location), - MakeJitConstant("VARIANCE_ENCODED_IN_TARGET", detectOutParams.variance_encoded_in_target), - MakeJitConstant("NMS_THRESHOLD", detectOutParams.nms_threshold), - MakeJitConstant("ETA", detectOutParams.eta), - MakeJitConstant("CONFIDENCE_THRESHOLD", detectOutParams.confidence_threshold), - MakeJitConstant("IMAGE_WIDTH", detectOutParams.input_width), - MakeJitConstant("IMAGE_HEIGH", detectOutParams.input_heigh), - MakeJitConstant("ELEMENTS_PER_THREAD", detectOutParams.elements_per_thread), - MakeJitConstant("PRIOR_COORD_OFFSET", detectOutParams.prior_coordinates_offset), - MakeJitConstant("PRIOR_INFO_SIZE", detectOutParams.prior_info_size), - MakeJitConstant("PRIOR_IS_NORMALIZED", detectOutParams.prior_is_normalized), - }); - - return jit; -} - -DetectionOutputKernelBase::DispatchData DetectionOutputKernelBase::SetDefault(const detection_output_params& /*params*/) const { - DispatchData dispatchData; - - dispatchData.gws[0] = 0; - dispatchData.gws[1] = 0; - dispatchData.gws[2] = 0; - - dispatchData.lws[0] = 0; - dispatchData.lws[1] = 0; - dispatchData.lws[2] = 0; - - return dispatchData; -} -} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.h deleted file mode 100644 index 607947b1399..00000000000 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.h +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright (c) 2018-2020 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#pragma once - -#include "kernel_base_opencl.h" -#include "kernel_selector_params.h" - -namespace kernel_selector { -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// detection_output_params -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -struct detection_output_params : public base_params { - detection_output_params() : base_params(KernelType::DETECTION_OUTPUT), detectOutParams() {} - - struct DedicatedParams { - uint32_t num_images; - uint32_t num_classes; - int32_t keep_top_k; - int32_t top_k; - int32_t background_label_id; - int32_t code_type; - int32_t conf_size_x; - int32_t conf_size_y; - int32_t conf_padding_x; - int32_t conf_padding_y; - int32_t elements_per_thread; - int32_t input_width; - int32_t input_heigh; - int32_t prior_coordinates_offset; - int32_t prior_info_size; - bool prior_is_normalized; - bool share_location; - bool variance_encoded_in_target; - float nms_threshold; - float eta; - float confidence_threshold; - }; - - DedicatedParams detectOutParams; - - virtual ParamsKey GetParamsKey() const { return base_params::GetParamsKey(); } -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// detection_output_optional_params -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -struct detection_output_optional_params : optional_params { - detection_output_optional_params() : optional_params(KernelType::DETECTION_OUTPUT) {} -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// DetectionOutputKernelBase -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -class DetectionOutputKernelBase : public KernelBaseOpenCL { -public: - using KernelBaseOpenCL ::KernelBaseOpenCL; - virtual ~DetectionOutputKernelBase() {} - - using DispatchData = CommonDispatchData; - -protected: - JitConstants GetJitConstants(const detection_output_params& params) const; - virtual DispatchData SetDefault(const detection_output_params& params) const; -}; -} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp deleted file mode 100644 index a68d4580c12..00000000000 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) 2018-2020 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#include "detection_output_kernel_ref.h" -#include "kernel_selector_utils.h" - -#define PRIOR_BOX_SIZE 4 // Each prior-box consists of [xmin, ymin, xmax, ymax]. - -namespace kernel_selector { - -ParamsKey DetectionOutputKernel::GetSupportedKey() const { - ParamsKey k; - k.EnableInputDataType(Datatype::F16); - k.EnableInputDataType(Datatype::F32); - k.EnableOutputDataType(Datatype::F16); - k.EnableOutputDataType(Datatype::F32); - k.EnableInputLayout(DataLayout::bfyx); - k.EnableOutputLayout(DataLayout::bfyx); - k.EnableTensorOffset(); - k.EnableTensorPitches(); - k.EnableBatching(); - return k; -} - -CommonDispatchData DetectionOutputKernel::SetDefault(const detection_output_params& params) const { - CommonDispatchData dispatchData = DetectionOutputKernelBase::SetDefault(params); - - // Number of all work items is set to total number of bounding boxes - - // one bounding box is procerssed by one work item - size_t num_classes = (params.detectOutParams.share_location) ? 1 : params.detectOutParams.num_classes; - - // Size of input0 (input location), if shared loaction it is equal to size of one class, - // else it has size of all items for all classes - size_t bboxesNum = params.inputs[0].LogicalSize() / PRIOR_BOX_SIZE / num_classes; - // Work group size is set to number of bounding boxes per image for sorting purpose - // (access to one table with sorted values) - size_t work_group_size = bboxesNum / params.inputs[0].Batch().v; - - if (work_group_size > 256) { - work_group_size = work_group_size / ((work_group_size / 256) + 1) + 1; - } - - bboxesNum = work_group_size * params.inputs[0].Batch().v; - - dispatchData.gws[0] = Align(bboxesNum, work_group_size); - dispatchData.gws[1] = 1; - dispatchData.gws[2] = 1; - - dispatchData.lws[0] = work_group_size; - dispatchData.lws[1] = 1; - dispatchData.lws[2] = 1; - - return dispatchData; -} - -KernelsData DetectionOutputKernel::GetKernelsData(const Params& params, const optional_params& options) const { - assert(params.GetType() == KernelType::DETECTION_OUTPUT && options.GetType() == KernelType::DETECTION_OUTPUT); - - KernelData kd = KernelData::Default(params); - const detection_output_params& detectOutParams = static_cast(params); - DispatchData dispatchData = SetDefault(detectOutParams); - - auto cldnnJit = GetJitConstants(detectOutParams); - auto entryPoint = GetEntryPoint(kernelName, detectOutParams.layerID, options); - auto jit = CreateJit(kernelName, cldnnJit, entryPoint); - - auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entryPoint); - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 2}); - - kd.estimatedTime = FORCE_PRIORITY_8; - - return {kd}; -} -} // namespace kernel_selector \ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.h deleted file mode 100644 index cafc7f323e0..00000000000 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) 2018 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#pragma once - -#include "detection_output_kernel_base.h" - -namespace kernel_selector { - -class DetectionOutputKernel : public DetectionOutputKernelBase { -public: - DetectionOutputKernel() : DetectionOutputKernelBase("detection_output") {} - virtual ~DetectionOutputKernel() {} - - KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; - ParamsKey GetSupportedKey() const override; - -private: - CommonDispatchData SetDefault(const detection_output_params& params) const override; -}; -} // namespace kernel_selector \ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.cpp deleted file mode 100644 index 534bffd1bdc..00000000000 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.cpp +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2018 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#include "detection_output_kernel_selector.h" -#include "detection_output_kernel_ref.h" -#include "detection_output_kernel_sort.h" - -namespace kernel_selector { -detection_output_kernel_selector::detection_output_kernel_selector() { Attach(); } - -KernelsData detection_output_kernel_selector::GetBestKernels(const Params& params, - const optional_params& options) const { - return GetNaiveBestKernel(params, options, KernelType::DETECTION_OUTPUT); -} - -detection_output_sort_kernel_selector::detection_output_sort_kernel_selector() { Attach(); } - -KernelsData detection_output_sort_kernel_selector::GetBestKernels(const Params& params, - const optional_params& options) const { - return GetNaiveBestKernel(params, options, KernelType::DETECTION_OUTPUT); -} -} // namespace kernel_selector \ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.h deleted file mode 100644 index ba670785767..00000000000 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) 2018 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#pragma once - -#include "kernel_selector.h" - -namespace kernel_selector { -class detection_output_kernel_selector : public kernel_selector_base { -public: - static detection_output_kernel_selector& Instance() { - static detection_output_kernel_selector instance_; - return instance_; - } - - detection_output_kernel_selector(); - - virtual ~detection_output_kernel_selector() {} - - KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; -}; - -class detection_output_sort_kernel_selector : public kernel_selector_base { -public: - static detection_output_sort_kernel_selector& Instance() { - static detection_output_sort_kernel_selector instance_; - return instance_; - } - - detection_output_sort_kernel_selector(); - - virtual ~detection_output_sort_kernel_selector() {} - - KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; -}; -} // namespace kernel_selector \ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.cpp deleted file mode 100644 index 03de4a71972..00000000000 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.cpp +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2018-2020 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#include "detection_output_kernel_sort.h" -#include "kernel_selector_utils.h" - -#define DETECTION_OUTPUT_ROW_SIZE 7 // Each detection consists of [image_id, label, confidence, xmin, ymin, xmax, ymax]. - -namespace kernel_selector { - -ParamsKey DetectionOutputKernel_sort::GetSupportedKey() const { - ParamsKey k; - k.EnableInputDataType(Datatype::F16); - k.EnableInputDataType(Datatype::F32); - k.EnableOutputDataType(Datatype::F16); - k.EnableOutputDataType(Datatype::F32); - k.EnableInputLayout(DataLayout::bfyx); - k.EnableOutputLayout(DataLayout::bfyx); - k.EnableTensorOffset(); - k.EnableTensorPitches(); - k.EnableBatching(); - return k; -} - -CommonDispatchData DetectionOutputKernel_sort::SetDefault(const detection_output_params& params) const { - CommonDispatchData dispatchData = DetectionOutputKernelBase::SetDefault(params); - - unsigned class_num = params.detectOutParams.num_classes; - if (params.detectOutParams.share_location && params.detectOutParams.background_label_id == 0) { - class_num -= 1; - } - const size_t bboxesNum = class_num * params.detectOutParams.num_images; - // Work group size is set to number of bounding boxes per image - size_t work_group_size = class_num; - - if (work_group_size > 256) { - work_group_size = (work_group_size + work_group_size % 2) / (work_group_size / 256 + 1); - } - - dispatchData.gws[0] = Align(bboxesNum, work_group_size); - dispatchData.gws[1] = 1; - dispatchData.gws[2] = 1; - - dispatchData.lws[0] = work_group_size; - dispatchData.lws[1] = 1; - dispatchData.lws[2] = 1; - - return dispatchData; -} - -KernelsData DetectionOutputKernel_sort::GetKernelsData(const Params& params, const optional_params& options) const { - assert(params.GetType() == KernelType::DETECTION_OUTPUT && - options.GetType() == KernelType::DETECTION_OUTPUT); - - KernelData kd = KernelData::Default(params); - const detection_output_params& detectOutParams = static_cast(params); - DispatchData dispatchData = SetDefault(detectOutParams); - - auto cldnnJit = GetJitConstants(detectOutParams); - auto entryPoint = GetEntryPoint(kernelName, detectOutParams.layerID, options); - auto jit = CreateJit(kernelName, cldnnJit, entryPoint); - - auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entryPoint); - - kd.estimatedTime = FORCE_PRIORITY_8; - - return {kd}; -} -} // namespace kernel_selector \ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.h deleted file mode 100644 index ac9ea3f81e1..00000000000 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) 2018 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#pragma once - -#include "detection_output_kernel_base.h" - -namespace kernel_selector { - -class DetectionOutputKernel_sort : public DetectionOutputKernelBase { -public: - DetectionOutputKernel_sort() : DetectionOutputKernelBase("detection_output_sort") {} - virtual ~DetectionOutputKernel_sort() {} - - KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; - ParamsKey GetSupportedKey() const override; - -private: - CommonDispatchData SetDefault(const detection_output_params& params) const override; -}; -} // namespace kernel_selector \ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output.cl deleted file mode 100644 index 23c0604db2d..00000000000 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output.cl +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright (c) 2018 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#include "include/include_all.cl" -#include "include/detection_output_common.cl" - -KERNEL (detection_output)(__global UNIT_TYPE* input_location, __global UNIT_TYPE* output, __global UNIT_TYPE* input_confidence, __global UNIT_TYPE* input_prior_box) -{ - const uint idx = get_global_id(0); // bbox idx - const uint local_id = (uint)get_local_id(0) * NUM_OF_ITEMS; // All bboxes from one image in work group - const uint idx_image = idx / NUM_OF_ITERATIONS; // idx of current image - - __local uint indexes[NUM_OF_PRIORS]; - __local uint scores_size[NUM_CLASSES * NUM_OF_IMAGES]; - __local bool stillSorting; - - uint indexes_class_0[NUM_OF_PRIORS]; - - int last_bbox_in_class = NUM_OF_ITEMS; - bool is_last_bbox_in_class = false; - for (uint it = 0; it < NUM_OF_ITEMS; it ++) - { - if (((local_id + it + 1) % NUM_OF_PRIORS) == 0 ) - { - last_bbox_in_class = it; - is_last_bbox_in_class = true; - break; - } - } - - for (uint idx_class = 0; idx_class < NUM_CLASSES; idx_class++) - { - if (idx_class == BACKGROUND_LABEL_ID) - { - continue; - } - - for (uint it = 0; it < NUM_OF_ITEMS; it++) - { - indexes[local_id + it] = local_id + it; - } - - stillSorting = true; - barrier(CLK_LOCAL_MEM_FENCE); - - bool is_last_bbox_in_image = (is_last_bbox_in_class) && (idx_class == (NUM_CLASSES - 1)); - - while(stillSorting) - { - barrier(CLK_LOCAL_MEM_FENCE); - stillSorting = false; - - for (uint i = 0; i < 2; i++) - { - for (uint it = 0; it < NUM_OF_ITEMS; it++) - { - uint item_id = local_id + it; - - uint idx1 = indexes[item_id]; - uint idx2 = indexes[item_id+1]; - bool perform = false; - if ((((i % 2) && (item_id % 2)) || - ((!(i % 2)) && (!(item_id % 2)))) && - (it < last_bbox_in_class)) - { - perform = true; - } - - if (perform && - (FUNC_CALL(get_score)(input_confidence, idx1, idx_class, idx_image) < - FUNC_CALL(get_score)(input_confidence, idx2, idx_class, idx_image))) - { - indexes[item_id] = idx2; - indexes[item_id+1] = idx1; - stillSorting = true; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - } - - // Do it only once per class in image - if (is_last_bbox_in_class) - { - UNIT_TYPE adaptive_threshold = NMS_THRESHOLD; - uint post_nms_count = 0; - const uint shared_class = (SHARE_LOCATION)? 0 : idx_class; - scores_size[idx_class] = 0; - - // Do the "keep" algorithm only for classes with confidence greater than CONFIDENCE_THRESHOLD. - // Check first, the biggest one (after sort) element in class. - if (FUNC_CALL(get_score)(input_confidence, indexes[0], idx_class, idx_image) != 0.0f) - { - for (uint i = 0; i < SCORES_COUNT; i++) - { - const uint bb_idx = indexes[i]; - bool keep = true; - for (uint j = 0; j < post_nms_count; j++) - { - if (!keep) - { - break; - } - - UNIT_TYPE overlap = 0.0; - const uint bb_idx2 = indexes[j]; - - UNIT_TYPE decoded_bbox1[4]; - FUNC_CALL(get_decoded_bbox)(decoded_bbox1, input_location, input_prior_box, bb_idx, shared_class, idx_image); - UNIT_TYPE decoded_bbox2[4]; - FUNC_CALL(get_decoded_bbox)(decoded_bbox2, input_location, input_prior_box, bb_idx2, shared_class, idx_image); - bool intersecting = - (decoded_bbox1[0] < decoded_bbox2[2]) & - (decoded_bbox2[0] < decoded_bbox1[2]) & - (decoded_bbox1[1] < decoded_bbox2[3]) & - (decoded_bbox2[1] < decoded_bbox1[3]); - - if (intersecting) - { - const UNIT_TYPE intersect_width = min(decoded_bbox1[2], decoded_bbox2[2]) - max(decoded_bbox1[0], decoded_bbox2[0]); - const UNIT_TYPE intersect_height = min(decoded_bbox1[3], decoded_bbox2[3]) - max(decoded_bbox1[1], decoded_bbox2[1]); - const UNIT_TYPE intersect_size = intersect_width * intersect_height; - const UNIT_TYPE bbox1_area = (decoded_bbox1[2] - decoded_bbox1[0]) * (decoded_bbox1[3] - decoded_bbox1[1]); - const UNIT_TYPE bbox2_area = (decoded_bbox2[2] - decoded_bbox2[0]) * (decoded_bbox2[3] - decoded_bbox2[1]); - overlap = intersect_size / (bbox1_area + bbox2_area - intersect_size); - } - keep = (overlap <= adaptive_threshold); - } - if (keep) - { - indexes[post_nms_count] = indexes[i]; - ++post_nms_count; - } - if ((keep) && (ETA < 1) && (adaptive_threshold > 0.5)) - { - adaptive_threshold *= ETA; - } - } - } - // Write number of scores to global memory, for proper output order in separated work groups - scores_size[idx_class] = post_nms_count; - } - - stillSorting = true; - // Wait for scores number from all classes in images - barrier(CLK_LOCAL_MEM_FENCE); - - uint output_offset = (idx_image * NUM_CLASSES_OUT + idx_class - HIDDEN_CLASS) * SCORES_COUNT; - - for (uint it = 0; it < NUM_OF_ITEMS; it++) - { - const uint local_id_out = local_id + it; - - if (local_id_out < scores_size[idx_class]) - { - const uint score_idx = indexes[local_id_out]; - uint bb_idx = indexes[local_id_out]; - const uint shared_class = (SHARE_LOCATION)? 0 : idx_class; - UNIT_TYPE decoded_bbox[4]; - FUNC_CALL(get_decoded_bbox)(decoded_bbox, input_location, input_prior_box, bb_idx, shared_class, idx_image); - - const uint out_idx = (local_id_out + output_offset) * OUTPUT_ROW_SIZE + OUTPUT_OFFSET; - output[out_idx] = TO_UNIT_TYPE(idx_image); - output[out_idx + 1] = TO_UNIT_TYPE(idx_class); - output[out_idx + 2] = FUNC_CALL(get_score)(input_confidence, score_idx, idx_class, idx_image); - output[out_idx + 3] = decoded_bbox[0]; - output[out_idx + 4] = decoded_bbox[1]; - output[out_idx + 5] = decoded_bbox[2]; - output[out_idx + 6] = decoded_bbox[3]; - } - } - - // If work item is processing last bbox in image (we already know the number of all detections), - // use it to fill rest of keep_top_k items if number of detections is smaller - if (is_last_bbox_in_class) - { - uint out_idx = output_offset + scores_size[idx_class]; - - uint current_top_k = output_offset + SCORES_COUNT; - for (uint i = out_idx; i < current_top_k; i++) - { - out_idx = i * OUTPUT_ROW_SIZE + OUTPUT_OFFSET; - output[out_idx] = -1.0; - output[out_idx + 1] = 0.0; - output[out_idx + 2] = 0.0; - output[out_idx + 3] = 0.0; - output[out_idx + 4] = 0.0; - output[out_idx + 5] = 0.0; - output[out_idx + 6] = 0.0; - } - } - - // Write number of scores kept in first step of detection output - if (is_last_bbox_in_image) - { - uint scores_sum = 0; - for (uint i = 0; i < NUM_CLASSES; i++) - { - scores_sum += scores_size[i]; - } - output[idx_image] = scores_sum; - - } - } -} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output_sort.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output_sort.cl deleted file mode 100644 index 676047609df..00000000000 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output_sort.cl +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright (c) 2018 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#include "include/include_all.cl" -#include "include/detection_output_common.cl" - -UNIT_TYPE FUNC(get_score_sort)(__global UNIT_TYPE* input_bboxes, const uint idx_bbox, const uint idx_image) -{ - if (idx_bbox == KEEP_BBOXES_NUM) - { - // Idx set to dummy value, return -1 to exclude this element from sorting - return -1; - } - else - { - return input_bboxes[(idx_bbox + idx_image * NUM_OF_IMAGE_BBOXES) * OUTPUT_ROW_SIZE + INPUT_OFFSET + SCORE_OFFSET]; - } -} - -KERNEL (detection_output_sort)(__global UNIT_TYPE* input_bboxes, __global UNIT_TYPE* output) -{ - __local uint indexes[NUM_CLASSES_IN]; - __local bool stillSorting; - __local uint output_count; - __local uint num_out_per_class[NUM_CLASSES_IN]; - - output_count = 0; - num_out_per_class[get_local_id(0)] = 0; - - const uint image_id = (uint)get_global_id(0) / NUM_CLASSES_IN; - const uint local_id = (uint)get_local_id(0) * NUM_OF_ITEMS_SORT; // All bboxes from one image in work group - - uint image_offset_input = image_id * NUM_OF_IMAGE_BBOXES; - - uint count_sum = 0; - for (uint i = 0; i < image_id; i++) - { - count_sum += (input_bboxes[i] < KEEP_TOP_K)? input_bboxes[i] : KEEP_TOP_K; - } - - uint image_offset_output = count_sum * OUTPUT_ROW_SIZE; - - // If there is less elements than needed, write input to output - if (input_bboxes[image_id] <= KEEP_TOP_K) - { - if (local_id == 0) - { - for (uint class = 0; class < NUM_CLASSES_IN; class++) - { - if (class == BACKGROUND_LABEL_ID && !HIDDEN_CLASS) - { - continue; - } - for (uint i = 0; i < NUM_OF_CLASS_BBOXES; i++) - { - uint input_idx = (i + image_offset_input + class * NUM_OF_CLASS_BBOXES) * OUTPUT_ROW_SIZE + INPUT_OFFSET; - if (input_bboxes[input_idx] != -1) - { - uint out_idx = output_count * OUTPUT_ROW_SIZE + image_offset_output; - - for (uint idx = 0; idx < OUTPUT_ROW_SIZE; idx++) - { - output[out_idx + idx] = input_bboxes[input_idx + idx]; - } - - output_count++; - } - else - { - break; - } - } - } - } - } - else - { - uint sorted_output[KEEP_TOP_K * NUM_CLASSES_IN]; - - for (uint it = 0; it < NUM_OF_ITEMS_SORT; it++) - { - indexes[local_id + it] = (local_id + it) * NUM_OF_CLASS_BBOXES; - } - - while (output_count < KEEP_BBOXES_NUM) - { - stillSorting = true; - - while(stillSorting) - { - barrier(CLK_LOCAL_MEM_FENCE); - stillSorting = false; - for (uint it = 0; it < NUM_OF_ITEMS_SORT; it++) - { - uint item_id = local_id + it; - for (uint i = 0; i < 2; i++) - { - - uint idx1 = indexes[item_id]; - uint idx2 = indexes[item_id+1]; - bool perform = false; - if ((((i % 2) && (item_id % 2)) || - ((!(i % 2)) && (!(item_id % 2)))) && - (item_id != (NUM_CLASSES_IN - 1))) - { - perform = true; - } - - if (perform && - (FUNC_CALL(get_score_sort)(input_bboxes, idx1, image_id) < - FUNC_CALL(get_score_sort)(input_bboxes, idx2, image_id))) - { - indexes[item_id] = idx2; - indexes[item_id+1] = idx1; - stillSorting = true; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - } - - if (local_id == 0) - { - UNIT_TYPE top_score = FUNC_CALL(get_score_sort)(input_bboxes, indexes[0], image_id); - - if (top_score != 0) - { - for (uint it = 0; (it < NUM_CLASSES_IN) && (output_count < KEEP_BBOXES_NUM); it++) - { - if (FUNC_CALL(get_score_sort)(input_bboxes, indexes[it], image_id) == top_score) - { - // write to output, create counter, and check if keep_top_k is satisfied. - uint input_idx = (indexes[it] + image_offset_input) * OUTPUT_ROW_SIZE + INPUT_OFFSET; - uint class_idx = input_bboxes[input_idx + 1] - HIDDEN_CLASS; - - sorted_output[class_idx * KEEP_TOP_K + num_out_per_class[class_idx]] = input_idx; - num_out_per_class[class_idx]++; - - indexes[it]++; - output_count++; - - // If all class elements are written to output, set dummy value to exclude class from sorting. - if ((indexes[it] % NUM_OF_CLASS_BBOXES) == 0) - { - indexes[it] = KEEP_BBOXES_NUM; - } - } - } - } - else - { - // There is no more significant results to sort. - output_count = KEEP_BBOXES_NUM; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - } - if (local_id == 0) - { - output_count = 0; - for (uint i = 0; i < NUM_CLASSES_IN; i++) - { - for (uint j = 0; j < num_out_per_class[i]; j++) - { - - uint out_idx = output_count * OUTPUT_ROW_SIZE + image_offset_output; - for (uint idx = 0; idx < OUTPUT_ROW_SIZE; idx++) - { - output[out_idx + idx] = input_bboxes[sorted_output[i * KEEP_TOP_K + j] + idx]; - } - output_count++; - } - } - uint image_count_sum = (input_bboxes[image_id] < KEEP_TOP_K)? input_bboxes[image_id] : KEEP_TOP_K; - for (output_count; output_count < image_count_sum; output_count++) - { - uint out_idx = output_count * OUTPUT_ROW_SIZE + image_offset_output; - output[out_idx] = -1.0; - output[out_idx + 1] = 0.0; - output[out_idx + 2] = 0.0; - output[out_idx + 3] = 0.0; - output[out_idx + 4] = 0.0; - output[out_idx + 5] = 0.0; - output[out_idx + 6] = 0.0; - } - } - } - - if (local_id == 0 && - image_id == (NUM_IMAGES - 1)) - { - for (output_count += count_sum; output_count < (KEEP_TOP_K * NUM_IMAGES); output_count++ ) - { - uint out_idx = output_count * OUTPUT_ROW_SIZE; - output[out_idx] = -1.0; - output[out_idx + 1] = 0.0; - output[out_idx + 2] = 0.0; - output[out_idx + 3] = 0.0; - output[out_idx + 4] = 0.0; - output[out_idx + 5] = 0.0; - output[out_idx + 6] = 0.0; - } - } - -} diff --git a/inference-engine/thirdparty/clDNN/src/detection_output.cpp b/inference-engine/thirdparty/clDNN/src/detection_output.cpp index a14ee452c6d..01eb991c308 100644 --- a/inference-engine/thirdparty/clDNN/src/detection_output.cpp +++ b/inference-engine/thirdparty/clDNN/src/detection_output.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -65,16 +65,8 @@ layout detection_output_inst::calc_output_layout(detection_output_node const& no // Add space for number of output results per image - needed in the next detection output step output_size += ((input_layout.size.batch[0] + 15) / 16) * 16; - if (node.get_program().get_options().get()->enabled()) { - return {input_layout.data_type, cldnn::format::bfyx, cldnn::tensor(1, 1, 1, output_size)}; - } else { - return {input_layout.data_type, - cldnn::format::bfyx, - cldnn::tensor(1, - 1, - DETECTION_OUTPUT_ROW_SIZE, - node.get_primitive()->keep_top_k * input_layout.size.batch[0])}; - } + return {input_layout.data_type, cldnn::format::bfyx, + cldnn::tensor(1, 1, DETECTION_OUTPUT_ROW_SIZE, node.get_primitive()->keep_top_k * input_layout.size.batch[0])}; } std::string detection_output_inst::to_string(detection_output_node const& node) { @@ -203,82 +195,4 @@ detection_output_inst::typed_primitive_inst(network_impl& network, detection_out "Detection output layer doesn't support input padding in Prior-Box input"); } -/************************ Detection Output keep_top_k part ************************/ - -primitive_type_id detection_output_sort::type_id() { - static primitive_type_base instance; - return &instance; -} - -layout detection_output_sort_inst::calc_output_layout(detection_output_sort_node const& node) { - assert(static_cast(node.get_primitive()->output_data_type) == false && - "Output data type forcing is not supported for " - "detection_output_sort_node!"); - CLDNN_ERROR_NOT_EQUAL(node.id(), - "Detection output layer input number", - node.get_dependencies().size(), - "expected number of inputs", - static_cast(1), - ""); - - auto input_layout = node.input().get_output_layout(); - int keep_top_k = node.as().get_primitive()->keep_top_k; - int num_images = node.as().get_primitive()->num_images; - - // If detection output sort is used as a second part of detection output get proper info from detection otput node - if (num_images == 0) { - CLDNN_ERROR_BOOL(node.id(), - "node.get_dependency(0).is_type()", - !node.get_dependency(0).is_type(), - "Cannot calculate output layout."); - input_layout = node.get_dependency(0).as().location().get_output_layout(); - keep_top_k = node.get_dependency(0).as().get_primitive()->keep_top_k; - num_images = input_layout.size.batch[0]; - } - // Batch size and feature size are 1. - // Number of bounding boxes to be kept is set to keep_top_k*batch size. - // If number of detections is lower than keep_top_k, will write dummy results at the end with image_id=-1. - // Each row is a 7 dimension vector, which stores: - // [image_id, label, confidence, xmin, ymin, xmax, ymax] - return {input_layout.data_type, - cldnn::format::bfyx, - cldnn::tensor(1, 1, DETECTION_OUTPUT_ROW_SIZE, keep_top_k * num_images)}; -} - -std::string detection_output_sort_inst::to_string(detection_output_sort_node const& node) { - auto node_info = node.desc_to_json(); - auto desc = node.get_primitive(); - - auto& input_bboxes = node.input(); - - std::stringstream primitive_description; - - json_composite detec_out_info; - detec_out_info.add("input bboxes id", input_bboxes.id()); - detec_out_info.add("num_classes:", desc->num_images); - detec_out_info.add("num_classes:", desc->num_classes); - detec_out_info.add("keep_top_k", desc->keep_top_k); - detec_out_info.add("share_location", desc->share_location); - detec_out_info.add("top_k", desc->top_k); - detec_out_info.dump(primitive_description); - - node_info->add("dection output info", detec_out_info); - node_info->dump(primitive_description); - - return primitive_description.str(); -} - -detection_output_sort_inst::typed_primitive_inst(network_impl& network, detection_output_sort_node const& node) - : parent(network, node) { - CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), - "Input memory format", - node.get_dependency(0).get_output_layout().format.value, - "expected bfyx input format", - format::bfyx); - - CLDNN_ERROR_BOOL(node.id(), - "Detecion output layer padding", - node.is_padded(), - "Detection output layer doesn't support output padding."); -} } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp index d0b78923394..baae08b130a 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include "network_impl.h" #include "implementation_map.h" #include "math_utils.h" +#include "register_gpu.hpp" #include "cpu_impl_helpers.hpp" #include @@ -636,7 +637,14 @@ struct detection_output_cpu : typed_primitive_impl { static primitive_impl* create(const detection_output_node& arg) { return new detection_output_cpu(arg); } }; -primitive_impl* runDetectOutCpu(const detection_output_node& arg) { return new detection_output_cpu(arg); } +namespace detail { + +attach_detection_output_gpu::attach_detection_output_gpu() { + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), detection_output_cpu::create); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), detection_output_cpu::create); +} + +} // namespace detail } // namespace gpu } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/detection_output_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/detection_output_gpu.cpp deleted file mode 100644 index b799e1b0cb6..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/detection_output_gpu.cpp +++ /dev/null @@ -1,164 +0,0 @@ -/* -// Copyright (c) 2016 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -*/ - -#include "detection_output_inst.h" -#include "primitive_gpu_base.h" -#include "error_handler.h" -#include "kernel_selector_helper.h" -#include "detection_output/detection_output_kernel_base.h" -#include "detection_output/detection_output_kernel_selector.h" - -#ifdef FIX_OPENMP_RELEASE_ISSUE -#ifdef OPENMP_FOUND -#include -#endif -#endif - -namespace cldnn { -namespace gpu { - -struct detection_output_gpu : typed_primitive_gpu_impl { - using parent = typed_primitive_gpu_impl; - using parent::parent; - -private: - static void setDetectOutSpecificParams(kernel_selector::detection_output_params::DedicatedParams& detectOutParams, - const detection_output_node& arg) { - auto primitive = arg.get_primitive(); - detectOutParams.keep_top_k = primitive->keep_top_k; - detectOutParams.num_classes = primitive->num_classes; - detectOutParams.top_k = primitive->top_k; - detectOutParams.background_label_id = primitive->background_label_id; - detectOutParams.code_type = (int32_t)primitive->code_type; - detectOutParams.share_location = primitive->share_location; - detectOutParams.variance_encoded_in_target = primitive->variance_encoded_in_target; - detectOutParams.nms_threshold = primitive->nms_threshold; - detectOutParams.eta = primitive->eta; - detectOutParams.confidence_threshold = primitive->confidence_threshold; - detectOutParams.prior_coordinates_offset = primitive->prior_coordinates_offset; - detectOutParams.prior_info_size = primitive->prior_info_size; - detectOutParams.prior_is_normalized = primitive->prior_is_normalized; - detectOutParams.input_width = primitive->input_width; - detectOutParams.input_heigh = primitive->input_height; - detectOutParams.conf_size_x = arg.confidence().get_output_layout().get_buffer_size().spatial[0]; - detectOutParams.conf_size_y = arg.confidence().get_output_layout().get_buffer_size().spatial[1]; - detectOutParams.conf_padding_x = arg.confidence().get_output_layout().data_padding.lower_size().spatial[0]; - detectOutParams.conf_padding_y = arg.confidence().get_output_layout().data_padding.lower_size().spatial[1]; - } - -public: - static primitive_impl* create(const detection_output_node& arg) { - if (!arg.get_program().get_options().get()->enabled()) { - return runDetectOutCpu(arg); - } - - auto detect_out_params = get_default_params(arg); - auto detect_out_optional_params = - get_default_optional_params(arg.get_program()); - - setDetectOutSpecificParams(detect_out_params.detectOutParams, arg); - - auto& kernel_selector = kernel_selector::detection_output_kernel_selector::Instance(); - auto best_kernels = kernel_selector.GetBestKernels(detect_out_params, detect_out_optional_params); - - CLDNN_ERROR_BOOL(arg.id(), - "Best_kernel.empty()", - best_kernels.empty(), - "Cannot find a proper kernel with this arguments"); - - auto detect_out = new detection_output_gpu(arg, best_kernels[0]); - - return detect_out; - } -}; - -primitive_impl* runDetectOutGpu(const detection_output_node& arg, kernel_selector::KernelData kernel) { - return new detection_output_gpu(arg, kernel); -} - -/************************ Detection Output keep_top_k part ************************/ - -struct detection_output_sort_gpu : typed_primitive_gpu_impl { - using parent = typed_primitive_gpu_impl; - using parent::parent; - -private: - static void setDetectOutSpecificParams(kernel_selector::detection_output_params::DedicatedParams& detectOutParams, - const detection_output_sort_node& arg) { - if (arg.get_dependency(0).is_type()) { - auto primitive = arg.get_dependency(0).as().get_primitive(); - detectOutParams.keep_top_k = primitive->keep_top_k; - detectOutParams.num_classes = primitive->num_classes; - detectOutParams.num_images = - arg.get_dependency(0).as().location().get_output_layout().size.batch[0]; - detectOutParams.top_k = primitive->top_k; - detectOutParams.share_location = primitive->share_location; - detectOutParams.background_label_id = primitive->background_label_id; - } else { - auto primitive = arg.get_primitive(); - detectOutParams.keep_top_k = primitive->keep_top_k; - detectOutParams.num_classes = primitive->num_classes; - detectOutParams.num_images = primitive->num_images; - detectOutParams.top_k = primitive->top_k; - detectOutParams.share_location = primitive->share_location; - detectOutParams.background_label_id = primitive->background_label_id; - } - } - -public: - static primitive_impl* create(const detection_output_sort_node& arg) { - auto detect_out_params = get_default_params(arg); - auto detect_out_optional_params = - get_default_optional_params(arg.get_program()); - - setDetectOutSpecificParams(detect_out_params.detectOutParams, arg); - - auto& kernel_selector = kernel_selector::detection_output_sort_kernel_selector::Instance(); - auto best_kernels = kernel_selector.GetBestKernels(detect_out_params, detect_out_optional_params); - - CLDNN_ERROR_BOOL(arg.id(), - "Best_kernel.empty()", - best_kernels.empty(), - "Cannot find a proper kernel with this arguments"); - - auto detect_out = new detection_output_sort_gpu(arg, best_kernels[0]); - - return detect_out; - } -}; - -primitive_impl* runDetectOutSortGpu(const detection_output_sort_node& arg, kernel_selector::KernelData kernel) { - return new detection_output_sort_gpu(arg, kernel); -} - -namespace detail { - -attach_detection_output_gpu::attach_detection_output_gpu() { - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), - detection_output_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), - detection_output_gpu::create); - implementation_map::add( - std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), - detection_output_sort_gpu::create); - implementation_map::add( - std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), - detection_output_sort_gpu::create); -} - -} // namespace detail -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/graph_initializations.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/graph_initializations.cpp index 4a15d93b48c..000ba5dc279 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/graph_initializations.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/graph_initializations.cpp @@ -120,56 +120,6 @@ void graph_initializations::replace_nodes(program_impl& p) { } } -void graph_initializations::handle_detection_output(program_impl& p) { - auto itr = p.nodes_map.begin(); // note we need to use iterators since currently processed element can be removed - while (itr != p.nodes_map.end()) { - auto node_itr = itr++; - auto& node = *(*node_itr).second; - // Create second part detection output primitive and replace nodes names - do it only once - if ((p.get_options().get()->enabled()) && - (node.is_type()) && - (node.id().find("_pre") == - std::string::npos)) { // ToDo: this will fail if user will name the primitive with using _pre like do_pre - // we need to use node mark() or some other idea to prevent it - // rename detection output - const primitive_id detect_out_node_name = node.id(); - const primitive_id new_primitive_id = detect_out_node_name + "_pre"; - p.rename(node, new_primitive_id); - - auto detect_out_prim = node.as().typed_desc(); - // Create new primitive, "keep top k" part of detection output - // ToDo: add a default parameters to the detection_output_sort class constructor to get rid off this - // initialization from here - auto detect_out_sort_prim = - std::make_shared(detect_out_node_name, - node.id(), - // not important params here - it will be set during - // "primitive_impl* create" func in "detection_output_sort_gpu" - 0, // num_images - 0, // num_classes - 0, // keep_top_k - false, // share_location - 0, // top_k - -1, // background_label_id - detect_out_prim->output_padding); - - p.get_or_create(detect_out_sort_prim); - - auto sort_node_itr = p.nodes_map.find(detect_out_node_name); - if (sort_node_itr == p.nodes_map.end()) continue; - - auto sort_node = sort_node_itr->second; - - // Add connection to second part of detection output - if (node.get_users().size()) { - p.add_intermediate(*sort_node, *(node.get_users().front()), 0, false); - } else { - p.add_connection(node, *sort_node); - } - } - } -} - void graph_initializations::handle_lstm(program_impl& p) { bool has_lstm_children; auto itr = p.nodes_map.begin(); // note we need to use iterators since currently processed element can be removed @@ -528,7 +478,6 @@ void graph_initializations::set_outputs(program_impl& p) { void graph_initializations::run(program_impl& p) { replace_nodes(p); - handle_detection_output(p); handle_lstm(p); handle_dynamic_lstm(p); set_outputs(p); diff --git a/inference-engine/thirdparty/clDNN/src/include/detection_output_inst.h b/inference-engine/thirdparty/clDNN/src/include/detection_output_inst.h index 9e495e0d956..9d91778f96e 100644 --- a/inference-engine/thirdparty/clDNN/src/include/detection_output_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/detection_output_inst.h @@ -1,5 +1,5 @@ /* -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -60,36 +60,4 @@ public: using detection_output_inst = typed_primitive_inst; -template <> -class typed_program_node : public typed_program_node_base { - using parent = typed_program_node_base; - -public: - using parent::parent; - - program_node& input() const { return get_dependency(0); } -}; - -using detection_output_sort_node = typed_program_node; - -template <> -class typed_primitive_inst : public typed_primitive_inst_base { - using parent = typed_primitive_inst_base; - -public: - static layout calc_output_layout(detection_output_sort_node const& node); - static std::string to_string(detection_output_sort_node const& node); - -public: - typed_primitive_inst(network_impl& network, detection_output_sort_node const& node); -}; - -using detection_output_sort_inst = typed_primitive_inst; - -namespace gpu { -primitive_impl* runDetectOutCpu(const detection_output_node& arg); -primitive_impl* runDetectOutGpu(const detection_output_node& arg, kernel_selector::KernelData kernel); -primitive_impl* runDetectOutSortGpu(const detection_output_sort_node& arg, kernel_selector::KernelData kernel); -} // namespace gpu - -} // namespace cldnn \ No newline at end of file +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/include/pass_manager.h b/inference-engine/thirdparty/clDNN/src/include/pass_manager.h index bc620bf43da..034109d3b3f 100644 --- a/inference-engine/thirdparty/clDNN/src/include/pass_manager.h +++ b/inference-engine/thirdparty/clDNN/src/include/pass_manager.h @@ -1,5 +1,5 @@ /* -// Copyright (c) 2018-2019 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -115,7 +115,6 @@ public: private: void run(program_impl& p) override; void replace_nodes(program_impl& p); - void handle_detection_output(program_impl& p); void handle_lstm(program_impl& p); void handle_dynamic_lstm(program_impl& p); void set_outputs(program_impl& p); diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/detection_output_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/detection_output_test.cpp index 0f823c69d38..502cc171706 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/detection_output_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/detection_output_test.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,8 +24,7 @@ #include #include "test_utils/test_utils.h" -namespace cldnn -{ +namespace cldnn { template<> struct type_to_data_type { static const data_types value = data_types::f16; }; } @@ -33,18 +32,15 @@ using namespace cldnn; using namespace tests; template -class detection_output_test : public ::testing::Test -{ +class detection_output_test : public ::testing::Test { public: detection_output_test() : - nms_threshold(0.1f) - {} + nms_threshold(0.1f) {} void init_buffers(cldnn::memory prior_memory, cldnn::memory confidence_memory, cldnn::memory location_memory, bool share_location, bool variance_encoded_in_target = false, - int prior_info_size = 4, int prior_coordinates_offset = 0, bool prior_is_normalized = true) - { + int prior_info_size = 4, int prior_coordinates_offset = 0, bool prior_is_normalized = true) { auto location_ptr = location_memory.pointer(); auto confidence_ptr = confidence_memory.pointer(); auto prior_box_ptr = prior_memory.pointer(); @@ -59,11 +55,9 @@ public: const float prior_multiplier = prior_is_normalized ? 1.0f : static_cast(this->img_size); const float variance = 0.1f; int idx = 0; - for (int h = 0; h < 2; ++h) - { + for (int h = 0; h < 2; ++h) { float center_y = (h + 0.5f) * step; - for (int w = 0; w < 2; ++w) - { + for (int w = 0; w < 2; ++w) { float center_x = (w + 0.5f) * step; prior_data[idx+prior_coordinates_offset+0] = (center_x - box_size / 2) * prior_multiplier; prior_data[idx+prior_coordinates_offset+1] = (center_y - box_size / 2) * prior_multiplier; @@ -73,28 +67,20 @@ public: idx += prior_info_size; } } - if (!variance_encoded_in_target) - { - for (int i = 0; i < idx; ++i) - { + if (!variance_encoded_in_target) { + for (int i = 0; i < idx; ++i) { prior_data[idx + i] = variance; } } // Fill confidences. idx = 0; - for (int i = 0; i < num_of_images; ++i) - { - for (int j = 0; j < num_priors; ++j) - { - for (int c = 0; c < num_classes; ++c) - { - if (i % 2 == c % 2) - { + for (int i = 0; i < num_of_images; ++i) { + for (int j = 0; j < num_priors; ++j) { + for (int c = 0; c < num_classes; ++c) { + if (i % 2 == c % 2) { confidence_data[idx++] = j * 0.2f; - } - else - { + } else { confidence_data[idx++] = 1 - j * 0.2f; } } @@ -105,14 +91,10 @@ public: const int num_loc_classes = share_location ? 1 : num_classes; const float loc_multiplier = variance_encoded_in_target ? variance : 1.0f; idx = 0; - for (int i = 0; i < num_of_images; ++i) - { - for (int h = 0; h < 2; ++h) - { - for (int w = 0; w < 2; ++w) - { - for (int c = 0; c < num_loc_classes; ++c) - { + for (int i = 0; i < num_of_images; ++i) { + for (int h = 0; h < 2; ++h) { + for (int w = 0; w < 2; ++w) { + for (int c = 0; c < num_loc_classes; ++c) { location_data[idx++] = (w % 2 ? -1 : 1) * (i * 1 + c / 2.f + 0.5f) * loc_multiplier; location_data[idx++] = (h % 2 ? -1 : 1) * (i * 1 + c / 2.f + 0.5f) * loc_multiplier; location_data[idx++] = (w % 2 ? -1 : 1) * (i * 1 + c / 2.f + 0.5f) * loc_multiplier; @@ -123,8 +105,7 @@ public: } } - void init_buffer_sort(cldnn::memory input_buff) - { + void init_buffer_sort(cldnn::memory input_buff) { auto input_data_ptr = input_buff.pointer(); EXPECT_EQ((int)input_buff.count(), 128); @@ -150,8 +131,7 @@ public: input_data[121] = -1; input_data[122] = 0; input_data[123] = 0; input_data[124] = 0; input_data[125] = 0; input_data[126] = 0; input_data[127] = 0; } - void check_results(const memory& output, const int num, const std::string values) - { + void check_results(const memory& output, const int num, const std::string values) { assert(num < output.get_layout().size.spatial[1]); // Split values to vector of items. @@ -163,18 +143,15 @@ public: // Check data. auto out_ptr = output.pointer(); const T* data = out_ptr.data(); - for (int i = 0; i < 2; ++i) - { + for (int i = 0; i < 2; ++i) { EXPECT_EQ(static_cast((float)data[num * output.get_layout().size.spatial[0] + i]), atoi(items[i].c_str())); } - for (int i = 2; i < 7; ++i) - { + for (int i = 2; i < 7; ++i) { EXPECT_TRUE(floating_point_equal(data[num * output.get_layout().size.spatial[0] + i], (T)(float)atof(items[i].c_str()))); } } - void setup_basic(bool runOnGPU) - { + void setup_basic() { const bool share_location = true; const int num_loc_classes = share_location ? 1 : this->num_classes; const int keep_top_k = 150; @@ -192,11 +169,6 @@ public: topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k)); build_options opts; - if (runOnGPU) - { - opts.set_option(build_option::detection_output_gpu(true)); - } - network network(engine, topology, opts); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); @@ -213,8 +185,7 @@ public: EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7); } - void setup_two_layers(bool runOnGPU) - { + void setup_two_layers() { const bool share_location = true; const int num_loc_classes = share_location ? 1 : this->num_classes; const int keep_top_k = 150; @@ -233,11 +204,6 @@ public: topology.add(detection_output("detection_output_2", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k)); build_options opts; - if (runOnGPU) - { - opts.set_option(build_option::detection_output_gpu(true)); - } - network network(engine, topology, opts); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); @@ -247,8 +213,7 @@ public: EXPECT_EQ(outputs.size(), size_t(2)); unsigned i = 1; - for (auto it = outputs.begin(); it != outputs.begin(); it++) - { + for (auto it = outputs.begin(); it != outputs.begin(); it++) { EXPECT_EQ(it->first, "detection_output_" + std::to_string(i)); @@ -260,8 +225,7 @@ public: } } - void forward_share_location(bool runOnGPU) - { + void forward_share_location() { const bool share_location = true; const int num_loc_classes = share_location ? 1 : this->num_classes; const int keep_top_k = 4; @@ -282,11 +246,6 @@ public: topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); build_options opts; - if (runOnGPU) - { - opts.set_option(build_option::detection_output_gpu(true)); - } - network network(engine, topology, opts); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); @@ -314,8 +273,7 @@ public: check_results(output_prim, 7, "-1 0 0 0 0 0 0"); } - void forward_num_detections_greater_than_keep_top_k(bool runOnGPU) - { + void forward_num_detections_greater_than_keep_top_k() { const bool share_location = true; const int num_loc_classes = share_location ? 1 : this->num_classes; const int keep_top_k = 1; @@ -336,11 +294,6 @@ public: topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); build_options opts; - if (runOnGPU) - { - opts.set_option(build_option::detection_output_gpu(true)); - } - network network(engine, topology, opts); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); @@ -362,8 +315,7 @@ public: check_results(output_prim, 1, "1 1 0.6 0.45 0.45 0.75 0.75"); } - void forward_num_detections_smaller_than_keep_top_k(bool runOnGPU) - { + void forward_num_detections_smaller_than_keep_top_k() { const bool share_location = true; const int num_loc_classes = share_location ? 1 : this->num_classes; const int keep_top_k = 6; @@ -384,11 +336,6 @@ public: topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); build_options opts; - if (runOnGPU) - { - opts.set_option(build_option::detection_output_gpu(true)); - } - network network(engine, topology, opts); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); @@ -420,8 +367,7 @@ public: check_results(output_prim, 11, "-1 0 0 0 0 0 0"); } - void test_forward_share_location_top_k(bool runOnGPU) - { + void test_forward_share_location_top_k() { const bool share_location = true; const int num_loc_classes = share_location ? 1 : this->num_classes; const int keep_top_k = 2; @@ -443,11 +389,6 @@ public: topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k)); build_options opts; - if (runOnGPU) - { - opts.set_option(build_option::detection_output_gpu(true)); - } - network network(engine, topology, opts); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); @@ -471,8 +412,7 @@ public: check_results(output_prim, 3, "-1 0 0 0 0 0 0"); } - void forward_no_share_location(bool runOnGPU) - { + void forward_no_share_location() { const bool share_location = false; const int num_loc_classes = share_location ? 1 : this->num_classes; const int keep_top_k = 10; @@ -493,11 +433,6 @@ public: topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); build_options opts; - if (runOnGPU) - { - opts.set_option(build_option::detection_output_gpu(true)); - } - network network(engine, topology, opts); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); @@ -537,8 +472,7 @@ public: check_results(output_prim, 19, "-1 0 0 0 0 0 0"); } - void forward_no_share_location_top_k(bool runOnGPU) - { + void forward_no_share_location_top_k() { const bool share_location = false; const int num_loc_classes = share_location ? 1 : this->num_classes; const int keep_top_k = 4; @@ -560,11 +494,6 @@ public: topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k)); build_options opts; - if (runOnGPU) - { - opts.set_option(build_option::detection_output_gpu(true)); - } - network network(engine, topology, opts); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); @@ -592,8 +521,7 @@ public: check_results(output_prim, 7, "-1 0 0 0 0 0 0"); } - void forward_no_share_location_neg_0(bool runOnGPU) - { + void forward_no_share_location_neg_0() { const bool share_location = false; const int num_loc_classes = share_location ? 1 : this->num_classes; const int keep_top_k = 5; @@ -614,11 +542,6 @@ public: topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); build_options opts; - if (runOnGPU) - { - opts.set_option(build_option::detection_output_gpu(true)); - } - network network(engine, topology, opts); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); @@ -648,8 +571,7 @@ public: check_results(output_prim, 9, "-1 0 0 0 0 0 0"); } - void forward_no_share_location_neg_0_top_k(bool runOnGPU) - { + void forward_no_share_location_neg_0_top_k() { const bool share_location = false; const int num_loc_classes = share_location ? 1 : this->num_classes; const int keep_top_k = 2; @@ -671,11 +593,6 @@ public: topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k)); build_options opts; - if (runOnGPU) - { - opts.set_option(build_option::detection_output_gpu(true)); - } - network network(engine, topology, opts); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); @@ -699,8 +616,7 @@ public: check_results(output_prim, 3, "-1 0 0 0 0 0 0"); } - void forward_no_share_location_top_k_input_padding(bool runOnGPU) - { + void forward_no_share_location_top_k_input_padding() { const bool share_location = false; const int num_loc_classes = share_location ? 1 : this->num_classes; const int keep_top_k = 4; @@ -723,11 +639,6 @@ public: topology.add(detection_output("detection_output", "input_location_padded", "input_confidence_padded", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k)); build_options opts; - if (runOnGPU) - { - opts.set_option(build_option::detection_output_gpu(true)); - } - network network(engine, topology, opts); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); @@ -755,8 +666,7 @@ public: check_results(output_prim, 7, "-1 0 0 0 0 0 0"); } - void test_forward_no_share_location_top_k_faster_rcnn_case(bool runOnGPU) - { + void test_forward_no_share_location_top_k_faster_rcnn_case() { const bool share_location = false; const int num_loc_classes = share_location ? 1 : this->num_classes; const int keep_top_k = 4; @@ -792,11 +702,6 @@ public: )); build_options opts; - if (runOnGPU) - { - opts.set_option(build_option::detection_output_gpu(true)); - } - network network(engine, topology, opts); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); @@ -834,181 +739,50 @@ public: typedef ::testing::Types detection_output_test_types; TYPED_TEST_CASE(detection_output_test, detection_output_test_types); -TYPED_TEST(detection_output_test, test_setup_basic) -{ - this->setup_basic(false); +TYPED_TEST(detection_output_test, test_setup_basic) { + this->setup_basic(); } -TYPED_TEST(detection_output_test, test_setup_basic_gpu) -{ - this->setup_basic(true); +TYPED_TEST(detection_output_test, test_setup_two_layers) { + this->setup_two_layers(); } -TYPED_TEST(detection_output_test, test_setup_two_layers) -{ - this->setup_two_layers(false); +TYPED_TEST(detection_output_test, test_forward_share_location) { + this->forward_share_location(); } -TYPED_TEST(detection_output_test, test_setup_two_layers_gpu) -{ - this->setup_two_layers(true); +TYPED_TEST(detection_output_test, test_forward_num_detections_greater_than_keep_top_k) { + this->forward_num_detections_greater_than_keep_top_k(); } -TYPED_TEST(detection_output_test, test_forward_share_location) -{ - this->forward_share_location(false); +TYPED_TEST(detection_output_test, test_forward_num_detections_smaller_than_keep_top_k) { + this->forward_num_detections_smaller_than_keep_top_k(); } -TYPED_TEST(detection_output_test, DISABLED_test_forward_share_location_gpu) -{ - this->forward_share_location(true); +TYPED_TEST(detection_output_test, test_forward_share_location_top_k) { + this->test_forward_share_location_top_k(); } -TYPED_TEST(detection_output_test, test_forward_num_detections_greater_than_keep_top_k) -{ - this->forward_num_detections_greater_than_keep_top_k(false); +TYPED_TEST(detection_output_test, test_forward_no_share_location) { + this->forward_no_share_location(); } -TYPED_TEST(detection_output_test, test_forward_num_detections_greater_than_keep_top_k_gpu) -{ - this->forward_num_detections_greater_than_keep_top_k(true); +TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k) { + this->forward_no_share_location_top_k(); } -TYPED_TEST(detection_output_test, test_forward_num_detections_smaller_than_keep_top_k) -{ - this->forward_num_detections_smaller_than_keep_top_k(false); +TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0) { + this->forward_no_share_location_neg_0(); } -TYPED_TEST(detection_output_test, DISABLED_test_forward_num_detections_smaller_than_keep_top_k_gpu) -{ - this->forward_num_detections_smaller_than_keep_top_k(true); +TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0_top_k) { + this->forward_no_share_location_neg_0_top_k(); } -TYPED_TEST(detection_output_test, test_forward_share_location_top_k) -{ - this->test_forward_share_location_top_k(false); +TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_input_padding) { + this->forward_no_share_location_top_k_input_padding(); } -TYPED_TEST(detection_output_test, test_forward_share_location_top_k_gpu) -{ - this->test_forward_share_location_top_k(true); +TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_faster_rcnn_case) { + this->test_forward_no_share_location_top_k_faster_rcnn_case(); } - -TYPED_TEST(detection_output_test, test_forward_no_share_location) -{ - this->forward_no_share_location(false); -} - -TYPED_TEST(detection_output_test, test_forward_no_share_location_gpu) -{ - this->forward_no_share_location(true); -} - -TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k) -{ - this->forward_no_share_location_top_k(false); -} - -TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_gpu) -{ - this->forward_no_share_location_top_k(true); -} - -TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0) -{ - this->forward_no_share_location_neg_0(false); -} - -TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0_gpu) -{ - this->forward_no_share_location_neg_0(true); -} - -TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0_top_k) -{ - this->forward_no_share_location_neg_0_top_k(false); -} - -TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0_top_k_gpu) -{ - this->forward_no_share_location_neg_0_top_k(true); -} - -TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_input_padding) -{ - this->forward_no_share_location_top_k_input_padding(false); -} - -TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_input_padding_gpu) -{ - this->forward_no_share_location_top_k_input_padding(true); -} - -TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_faster_rcnn_case) -{ - this->test_forward_no_share_location_top_k_faster_rcnn_case(false); -} - -TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_faster_rcnn_case_gpu) -{ - this->test_forward_no_share_location_top_k_faster_rcnn_case(true); -} - -TYPED_TEST(detection_output_test, test_detection_output_sort_gpu) -{ - const bool share_location = false; - const int num_loc_classes = share_location ? 1 : this->num_classes; - const int keep_top_k = 10; - const int background_label_id = -1; - const int top_k = -1; - - const unsigned out_row_size = 7; - const unsigned score_space = ((this->num_of_images + 15) / 16) * 16; - int input_size = this->num_of_images * num_loc_classes * this->num_priors * out_row_size + score_space; - - const auto& engine = get_test_engine(); - cldnn::memory input_buff = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ 1, 1, 1, input_size } }); - - this->init_buffer_sort(input_buff); - - topology topology; - topology.add(input_layout("input_location", input_buff.get_layout())); - - topology.add(detection_output_sort("detection_output_sort", "input_location", this->num_of_images, this->num_classes, keep_top_k, share_location, top_k, background_label_id)); - network network(engine, topology); - network.set_input_data("input_location", input_buff); - - auto outputs = network.execute(); - - EXPECT_EQ(outputs.size(), size_t(1)); - EXPECT_EQ(outputs.begin()->first, "detection_output_sort"); - - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7); - - auto output_prim = outputs.begin()->second.get_memory(); - - this->check_results(output_prim, 0, "0 0 0.6 0.55 0.55 0.85 0.85"); - this->check_results(output_prim, 1, "0 0 0.4 0.15 0.55 0.45 0.85"); - this->check_results(output_prim, 2, "0 0 0.2 0.55 0.15 0.85 0.45"); - this->check_results(output_prim, 3, "0 0 0.0 0.15 0.15 0.45 0.45"); - this->check_results(output_prim, 4, "0 1 1.0 0.20 0.20 0.50 0.50"); - this->check_results(output_prim, 5, "0 1 0.8 0.50 0.20 0.80 0.50"); - this->check_results(output_prim, 6, "0 1 0.6 0.20 0.50 0.50 0.80"); - this->check_results(output_prim, 7, "0 1 0.4 0.50 0.50 0.80 0.80"); - this->check_results(output_prim, 8, "1 0 1.0 0.25 0.25 0.55 0.55"); - this->check_results(output_prim, 9, "1 0 0.4 0.45 0.45 0.75 0.75"); - this->check_results(output_prim, 10, "1 1 0.6 0.40 0.40 0.70 0.70"); - this->check_results(output_prim, 11, "-1 0 0 0 0 0 0"); - this->check_results(output_prim, 12, "-1 0 0 0 0 0 0"); - this->check_results(output_prim, 13, "-1 0 0 0 0 0 0"); - this->check_results(output_prim, 14, "-1 0 0 0 0 0 0"); - this->check_results(output_prim, 15, "-1 0 0 0 0 0 0"); - this->check_results(output_prim, 16, "-1 0 0 0 0 0 0"); - this->check_results(output_prim, 17, "-1 0 0 0 0 0 0"); - this->check_results(output_prim, 18, "-1 0 0 0 0 0 0"); - this->check_results(output_prim, 19, "-1 0 0 0 0 0 0"); -} -