diff --git a/inference-engine/src/cldnn_engine/cldnn_program.cpp b/inference-engine/src/cldnn_engine/cldnn_program.cpp index 1ccef5a03a3..ec846cf19b0 100644 --- a/inference-engine/src/cldnn_engine/cldnn_program.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp @@ -3755,6 +3755,8 @@ void Program::CreateGatherPrimitive(cldnn::topology& topology, InferenceEngine:: } }; + auto gatherLayerName = layer_type_name_ID(layer); + std::vector reorderedInputs; reorderedInputs.resize(inputPrimitives.size()); @@ -3771,23 +3773,134 @@ void Program::CreateGatherPrimitive(cldnn::topology& topology, InferenceEngine:: targetFormat, cldnn::data_types::i32); topology.add(preprocessPrim); - AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(layer), layer); - reorderedInputs[portIndex] = (reorderPrimName); + AddInnerPrimitiveToProfiler(reorderPrimName, gatherLayerName, layer); + reorderedInputs[portIndex] = reorderPrimName; } else { reorderedInputs[portIndex] = inputPrimitives[portIndex]; } } - std::string gatherLayerName = layer_type_name_ID(layer); + auto indicesDims = layer->insData[1].lock()->getTensorDesc().getDims(); + auto indicesLayout = layer->insData[1].lock()->getTensorDesc().getLayout(); + auto indicesFormat = FormatFromLayout(indicesLayout); + + auto inputDims = layer->insData[0].lock()->getTensorDesc().getDims(); + auto inputLayout = layer->insData[0].lock()->getTensorDesc().getLayout(); + auto inputFormat = FormatFromLayout(inputLayout); + + auto outDimsOriginal = layer->outData[0]->getTensorDesc().getDims(); + auto outputLayoutOriginal = layer->outData[0]->getTensorDesc().getLayout(); + auto outputFormatOriginal = FormatFromLayout(outputLayoutOriginal); + + auto outDims = outDimsOriginal; + auto targetDatatype = DataTypeFromPrecision(layer->precision); + + auto nonNegativeAxis = (axis >= 0) ? axis : axis + 3; + + // following vector is needed just to check if we can apply bfyx WA + SizeVector originalRequiredDims; + for (size_t d = 0; d < inputDims.size(); d++) { + if ((d == nonNegativeAxis) || (inputDims[d] > 1)) { + originalRequiredDims.push_back(d); + } + } + + if (originalRequiredDims.size() < 4) { + // make sure that we will have at least 4 required dimensions + auto originalAxesIt = originalRequiredDims.begin(); + for (size_t i = 0; i < 4; i++) { + int dimFoundAtIndex = -1; + for (size_t j = 0; j < originalRequiredDims.size(); j++) { + if (originalRequiredDims[j] == i) { + dimFoundAtIndex = j; + } + } + if (dimFoundAtIndex == -1) { + originalAxesIt = originalRequiredDims.insert(originalAxesIt, i); + } + originalAxesIt++; + } + } + + // clDNN primitive is missing proper support of 5d/6d inputs + // but we can still fall back to bfyx format in some cases + bool bfyx_wa = ((inputFormat == cldnn::format::bfzyx || inputFormat == cldnn::format::bfwzyx) && + (originalRequiredDims.size() == 4) && + (indicesFormat == cldnn::format::bfyx)); + + if (bfyx_wa) { + if (indicesDims.size() > 1) { + // reshape the indices dims to 1D (along batch axis) + size_t indDimAcc = std::accumulate(indicesDims.begin(), indicesDims.end(), 1, std::multiplies()); + SizeVector targetIndDims{ indDimAcc, 1, 1, 1 }; + + auto reshapeName = reorderedInputs[1] + "_" + layer->name + "_reshape"; + auto targetTensor = CldnnTensorFromIEDims(targetIndDims); + auto reshapePrim = cldnn::reshape(reshapeName, reorderedInputs[1], CldnnTensorFromIEDims(targetIndDims)); + topology.add(reshapePrim); + AddInnerPrimitiveToProfiler(reshapeName, gatherLayerName, layer); + reorderedInputs[1] = reshapeName; + + // adjust expected output dims + outDims[nonNegativeAxis] = indDimAcc; + outDims.erase(outDims.begin() + nonNegativeAxis + 1, outDims.begin() + nonNegativeAxis + indicesDims.size()); + } + + // reorder input to bfyx + auto reorderName = reorderedInputs[0] + "_" + layer->name + "_format_reorder"; + auto reorderPrim = cldnn::reorder(reorderName, reorderedInputs[0], cldnn::format::bfyx, targetDatatype); + topology.add(reorderPrim); + AddInnerPrimitiveToProfiler(reorderName, gatherLayerName, layer); + reorderedInputs[0] = reorderName; + + // calculate new input/output dims in bfyx format + SizeVector targetInDims(4); + SizeVector targetOutDims(4); + for (size_t d = 0; d < 4; d++) { + targetInDims[d] = inputDims[originalRequiredDims[d]]; + targetOutDims[d] = outDims[originalRequiredDims[d]]; + } + outDims = targetOutDims; + + // calculate new axis in bfyx format + for (size_t d = 0; d < originalRequiredDims.size(); d++) { + if (originalRequiredDims[d] == nonNegativeAxis) { + axis = d; + } + } + + // reshape the input dims to the ones expected in bfyx format + auto reshapeName = reorderedInputs[0] + "_" + layer->name + "_reshape"; + auto targetTensor = CldnnTensorFromIEDims(targetInDims); + auto reshapePrim = cldnn::reshape(reshapeName, reorderedInputs[0], CldnnTensorFromIEDims(targetInDims)); + topology.add(reshapePrim); + AddInnerPrimitiveToProfiler(reshapeName, gatherLayerName, layer); + reorderedInputs[0] = reshapeName; + } + auto gatherPrim = cldnn::gather( - gatherLayerName, - reorderedInputs[0], - reorderedInputs[1], - cldnnAxisFromIE(axis), - CldnnTensorFromIEDims(gatherLayer->outData[0]->getTensorDesc().getDims())); + gatherLayerName, + reorderedInputs[0], + reorderedInputs[1], + cldnnAxisFromIE(axis), + CldnnTensorFromIEDims(outDims)); topology.add(gatherPrim); AddPrimitiveToProfiler(gatherLayerName, layer); + + if (bfyx_wa) { + // reorder output back to original format + auto reorderName = gatherLayerName + "_" + layer->name + "_format_reorder"; + auto reorderPrim = cldnn::reorder(reorderName, gatherPrim, outputFormatOriginal, targetDatatype); + topology.add(reorderPrim); + AddInnerPrimitiveToProfiler(reorderName, gatherLayerName, layer); + + // reshape output back to original dims + auto reshapeName = gatherLayerName + "_" + layer->name + "_reshape"; + auto reshapePrim = cldnn::reshape(reshapeName, reorderName, CldnnTensorFromIEDims(outDimsOriginal)); + topology.add(reshapePrim); + AddInnerPrimitiveToProfiler(reshapeName, gatherLayerName, layer); + } } void CLDNNPlugin::Program::CreateGatherTreePrimitive(cldnn::topology & topology, InferenceEngine::CNNLayerPtr & layer) {