[IE CLDNN] Use single primitive reduce when axis=1(feature) (#4548)
This commit is contained in:
@@ -134,11 +134,8 @@ cldnn::device_info clDNNEngine::GetDeviceInfo(const std::map<std::string, std::s
|
||||
template<typename T>
|
||||
static bool disableReduceDecomposition(const std::shared_ptr<const ngraph::Node> node) {
|
||||
if (auto op = std::dynamic_pointer_cast<const T>(node)) {
|
||||
auto reduction_axes = op->get_reduction_axes().to_vector();
|
||||
bool reduce_along_f = op->get_reduction_axes().size() == 1 && std::count(reduction_axes.begin(), reduction_axes.end(), 1) != 0;
|
||||
bool fp16_batch_not_1 = op->get_element_type() == ngraph::element::f16 && op->input(0).get_shape()[0] != 1;
|
||||
bool can_use_reduce = !reduce_along_f && !fp16_batch_not_1;
|
||||
return can_use_reduce;
|
||||
return !fp16_batch_not_1;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -22,6 +22,8 @@ ParamsKey ResampleKernelOpt::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::F16);
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableInputDataType(Datatype::UINT8);
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::F16);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableOutputDataType(Datatype::UINT8);
|
||||
@@ -84,6 +86,11 @@ bool ResampleKernelOpt::Validate(const Params& p, const optional_params& o) cons
|
||||
|
||||
const auto& input = params.inputs[0];
|
||||
|
||||
if ((input.GetDType() == Datatype::UINT8 || input.GetDType() == Datatype::INT8) &&
|
||||
params.resampleType != ResampleType::NEAREST_NEIGHBOR &&
|
||||
params.resampleType != ResampleType::BILINEAR_INTERP)
|
||||
return false;
|
||||
|
||||
if (input.GetLayout() != DataLayout::fs_b_yx_fsv32 && input.GetLayout() != DataLayout::b_fs_yx_fsv16)
|
||||
return false;
|
||||
|
||||
|
||||
@@ -256,7 +256,7 @@ uint offset = batch_out * input_batch_pitch + ((feature_out + FSV - 1) / FSV) *
|
||||
for (uint yi = y_out; yi < y_max_val; ++yi) {
|
||||
for (uint xi = x_out; xi < x_max_val; ++xi) {
|
||||
INPUT_VEC input = (INPUT_VEC)(INPUT_INIT_VAL);
|
||||
#if (REDUCE_MAX_MODE || REDUCE_MIN_MODE || REDUCE_PROD_MODE || REDUCE_AND_MODE || REDUCE_LOG_SUM_EXP_MODE) && REDUCE_FEATURE && (INPUT0_FEATURE_NUM % FSV != 0)
|
||||
#if REDUCE_FEATURE && (INPUT0_FEATURE_NUM % FSV != 0)
|
||||
if (fi + FSV <= INPUT0_FEATURE_NUM)
|
||||
input = BLOCK_READ(data, offset);
|
||||
else
|
||||
@@ -273,7 +273,7 @@ uint offset = batch_out * input_batch_pitch + ((feature_out + FSV - 1) / FSV) *
|
||||
#if INPUT0_SIZE_X % READ_OFFSET != 0
|
||||
for (uint xi = x_leftover_start; xi < x_leftover_end; ++xi) {
|
||||
INPUT0_TYPE leftovers = INIT_VAL;
|
||||
#if (REDUCE_MAX_MODE || REDUCE_MIN_MODE || REDUCE_PROD_MODE || REDUCE_AND_MODE || REDUCE_LOG_SUM_EXP_MODE) && REDUCE_FEATURE && (INPUT0_FEATURE_NUM % FSV != 0)
|
||||
#if REDUCE_FEATURE && (INPUT0_FEATURE_NUM % FSV != 0)
|
||||
if (fi + FSV <= INPUT0_FEATURE_NUM)
|
||||
leftovers = DT_INPUT_BLOCK_READ(data, offset);
|
||||
else
|
||||
@@ -341,7 +341,7 @@ uint offset = batch_out * input_batch_pitch + ((feature_out + FSV - 1) / FSV) *
|
||||
for (uint yi = y_out; yi < y_max_val; ++yi) {
|
||||
for (uint xi = x_out; xi < x_max_val; ++xi) {
|
||||
INPUT_VEC input = (INPUT_VEC)(INPUT_INIT_VAL);
|
||||
#if (REDUCE_MAX_MODE || REDUCE_MIN_MODE || REDUCE_PROD_MODE || REDUCE_AND_MODE || REDUCE_LOG_SUM_EXP_MODE) && REDUCE_FEATURE && (INPUT0_FEATURE_NUM % FSV != 0)
|
||||
#if REDUCE_FEATURE && (INPUT0_FEATURE_NUM % FSV != 0)
|
||||
if (fi + FSV <= INPUT0_FEATURE_NUM)
|
||||
input = BLOCK_READ(data, offset);
|
||||
else
|
||||
|
||||
Reference in New Issue
Block a user