[GNA] Added SW_FP32 mode w/o SF for BasicLSTM (#10115)
* [GNA] Added SW_FP32 mode w/o SF for BasicLSTM * deleted additional test added sw_fp32 mode for exisiting test changed reference output for new mode * [GNA] Fixed according to review * [GNA] Parametrized weights range * fixed after review Co-authored-by: Mikhail Ryzhov <mikhail.ryzhov@intel.com>
This commit is contained in:
parent
bfa0e3e1a4
commit
0091d52c78
@ -19,6 +19,10 @@ const std::vector<std::map<std::string, std::string>> configs = {
|
||||
{
|
||||
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
|
||||
{"GNA_SCALE_FACTOR_0", "1638.4"}
|
||||
},
|
||||
{
|
||||
{"GNA_DEVICE_MODE", "GNA_SW_FP32"},
|
||||
{"GNA_SCALE_FACTOR_0", "1638.4"}
|
||||
}
|
||||
};
|
||||
|
||||
@ -31,6 +35,8 @@ size_t small_num_cells = 10;
|
||||
|
||||
size_t big_num_cells = 49;
|
||||
|
||||
std::pair<float, float> weights_range = {0.f, 0.02f};
|
||||
|
||||
const std::vector<bool> decompose = { false, true };
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BasicLSTM, Basic_LSTM_S,
|
||||
@ -40,7 +46,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_BasicLSTM, Basic_LSTM_S,
|
||||
::testing::ValuesIn(configs),
|
||||
::testing::ValuesIn(size_params),
|
||||
::testing::Values(small_num_cells),
|
||||
::testing::ValuesIn(decompose)),
|
||||
::testing::ValuesIn(decompose),
|
||||
::testing::Values(weights_range)),
|
||||
Basic_LSTM_S::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BasicLSTM_big_cells_num, Basic_LSTM_S,
|
||||
@ -50,6 +57,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_BasicLSTM_big_cells_num, Basic_LSTM_S,
|
||||
::testing::ValuesIn(configs),
|
||||
::testing::Values(size_params[0]),
|
||||
::testing::Values(big_num_cells),
|
||||
::testing::ValuesIn(decompose)),
|
||||
::testing::ValuesIn(decompose),
|
||||
::testing::Values(weights_range)),
|
||||
Basic_LSTM_S::getTestCaseName);
|
||||
} // namespace
|
||||
|
@ -43,15 +43,9 @@ TEST_P(Basic_LSTM_S, CompareWithRefImpl_LowLatencyTransformation) {
|
||||
|
||||
// Generate inputs
|
||||
GenerateInputs();
|
||||
|
||||
// Calculate References for the network before transformation passes
|
||||
auto referenceOutputs = CalculateRefs();
|
||||
|
||||
// Apply LowLatency and UnrollTensorIterator transformations
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::LowLatency2>(); // LowLatency enables UnrollTI
|
||||
manager.run_passes(function);
|
||||
functionRefs = ngraph::clone_function(*function);
|
||||
LoadNetwork();
|
||||
auto referenceOutputs = CalculateRefs();
|
||||
auto states = inferRequest.QueryState();
|
||||
for (auto& state : states) {
|
||||
auto name = state.GetName();
|
||||
|
@ -21,7 +21,8 @@ typedef std::tuple<
|
||||
std::map<std::string, std::string>, // Configuration
|
||||
std::pair<size_t, size_t>, // Third dimenstion and hidden size
|
||||
size_t, // Number of Cells
|
||||
bool // Decompose LSTMCell
|
||||
bool, // Decompose LSTMCell
|
||||
std::pair<float, float> // Input and weights range
|
||||
> basicLstmParams;
|
||||
|
||||
class Basic_LSTM_S : public testing::WithParamInterface<basicLstmParams>,
|
||||
@ -33,6 +34,7 @@ public:
|
||||
static std::shared_ptr<ngraph::Function> GetNetwork(size_t thirdDimOut,
|
||||
size_t hiddenSize,
|
||||
size_t num_cells = 10,
|
||||
std::pair<float, float> weights_range = {0.f, 10.f},
|
||||
const InferenceEngine::Precision& netPrecission = InferenceEngine::Precision::FP32,
|
||||
std::vector<float>* hidden_memory_init_out = nullptr,
|
||||
std::vector<float>* cell_memory_init_out = nullptr);
|
||||
@ -43,10 +45,11 @@ protected:
|
||||
|
||||
size_t hidden_size;
|
||||
size_t third_dim;
|
||||
std::pair<float, float> weights_range;
|
||||
std::vector<float> hidden_memory_init;
|
||||
std::vector<float> cell_memory_init;
|
||||
void SetUp() override;
|
||||
std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> CalculateRefs() override;
|
||||
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override;
|
||||
};
|
||||
|
||||
} // namespace SubgraphTestsDefinitions
|
||||
|
@ -17,7 +17,8 @@ std::string Basic_LSTM_S::getTestCaseName(const testing::TestParamInfo<basicLstm
|
||||
std::pair<size_t, size_t> size_params;
|
||||
size_t num_cells;
|
||||
bool decompose;
|
||||
std::tie(netPrecision, targetDevice, configuration, size_params, num_cells, decompose) = obj.param;
|
||||
std::pair<float, float> weights;
|
||||
std::tie(netPrecision, targetDevice, configuration, size_params, num_cells, decompose, weights) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
|
||||
@ -39,12 +40,12 @@ void Basic_LSTM_S::SetUp() {
|
||||
std::pair<size_t, size_t> size_params;
|
||||
size_t num_cells;
|
||||
bool decompose;
|
||||
std::tie(netPrecision, targetDevice, configuration, size_params, num_cells, decompose) = this->GetParam();
|
||||
std::tie(netPrecision, targetDevice, configuration, size_params, num_cells, decompose, weights_range) = this->GetParam();
|
||||
third_dim = size_params.first;
|
||||
hidden_size = size_params.second;
|
||||
outPrc = InferenceEngine::Precision::FP32;
|
||||
|
||||
function = GetNetwork(size_params.first, size_params.second, num_cells, netPrecision, &hidden_memory_init, &cell_memory_init);
|
||||
function = GetNetwork(size_params.first, size_params.second, num_cells, weights_range, netPrecision, &hidden_memory_init, &cell_memory_init);
|
||||
if (decompose) {
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::LSTMCellDecomposition>();
|
||||
@ -55,6 +56,7 @@ void Basic_LSTM_S::SetUp() {
|
||||
std::shared_ptr<ngraph::Function> Basic_LSTM_S::GetNetwork(size_t thirdDimOut,
|
||||
size_t hiddenSize,
|
||||
size_t num_cells,
|
||||
std::pair<float, float> weights_range,
|
||||
const InferenceEngine::Precision& netPrecission,
|
||||
std::vector<float>* hidden_memory_init_out,
|
||||
std::vector<float>* cell_memory_init_out) {
|
||||
@ -70,8 +72,8 @@ std::shared_ptr<ngraph::Function> Basic_LSTM_S::GetNetwork(size_t thirdDimOut,
|
||||
auto reshape1 = std::make_shared<ngraph::opset1::Reshape>(params[0], pattern1, false);
|
||||
|
||||
auto reshape1_shape = reshape1->output(0).get_shape();
|
||||
auto H_init = ngraph::builder::makeConstant<float>(ngPrc, { batch_size, hiddenSize }, {}, true);
|
||||
auto C_init = ngraph::builder::makeConstant<float>(ngPrc, { batch_size, hiddenSize }, {}, true);
|
||||
auto H_init = ngraph::builder::makeConstant<float>(ngPrc, { batch_size, hiddenSize }, {}, true, weights_range.second, weights_range.first);
|
||||
auto C_init = ngraph::builder::makeConstant<float>(ngPrc, { batch_size, hiddenSize }, {}, true, weights_range.second, weights_range.first);
|
||||
if (hidden_memory_init_out != nullptr) {
|
||||
*hidden_memory_init_out = std::static_pointer_cast<ngraph::opset1::Constant>(H_init)->cast_vector<float>();
|
||||
}
|
||||
@ -84,8 +86,9 @@ std::shared_ptr<ngraph::Function> Basic_LSTM_S::GetNetwork(size_t thirdDimOut,
|
||||
C_t->set_friendly_name("cell_state_1");
|
||||
//Body
|
||||
auto X = std::make_shared<ngraph::opset1::Parameter>(ngPrc, ngraph::Shape{ batch_size, 1, reshape1_shape[2] });
|
||||
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, reshape1_shape[2] }, {}, true);
|
||||
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, {}, true);
|
||||
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, reshape1_shape[2] }, {}, true, weights_range.second, weights_range.first);
|
||||
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, {}, true, weights_range.second,
|
||||
weights_range.first);
|
||||
|
||||
//lstm [1, 10], [1, 118], [1, 118] -> [1, 118], [1, 118]
|
||||
outFormShapes1 = { batch_size, reshape1_shape[2] };
|
||||
@ -112,7 +115,7 @@ std::shared_ptr<ngraph::Function> Basic_LSTM_S::GetNetwork(size_t thirdDimOut,
|
||||
auto out0 = tensor_iterator->get_iter_value(H_o, -1);
|
||||
|
||||
const size_t output_size = 12;
|
||||
auto fc1 = ngraph::builder::makeFullyConnected(out0, ngPrc, output_size, true, { hiddenSize, output_size }, { 1 }, { 1 });
|
||||
auto fc1 = ngraph::builder::makeFullyConnected(out0, ngPrc, output_size, true, { hiddenSize, output_size }, { weights_range.second }, { 0.f });
|
||||
|
||||
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(fc1) };
|
||||
return std::make_shared<ngraph::Function>(results, params, "Basic_LSTM_S");
|
||||
@ -146,60 +149,11 @@ void Basic_LSTM_S::Run() {
|
||||
|
||||
const auto& actualOutputs = GetOutputs();
|
||||
auto referenceOutputs = CalculateRefs();
|
||||
|
||||
Compare(referenceOutputs, actualOutputs);
|
||||
}
|
||||
|
||||
std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> Basic_LSTM_S::CalculateRefs() {
|
||||
//For now TensorIterator is not implemented in ngraph interpreter so it is needed to validate with another reference
|
||||
auto reference_model = ngraph::clone_function(*function);
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||
manager.run_passes(reference_model);
|
||||
|
||||
auto refCnnNetwork = InferenceEngine::CNNNetwork{ reference_model };
|
||||
auto refExecutableNetwork = core->LoadNetwork(refCnnNetwork, targetDevice);
|
||||
|
||||
auto refInferRequest = refExecutableNetwork.CreateInferRequest();
|
||||
std::vector<InferenceEngine::InputInfo::Ptr> refInfos;
|
||||
for (const auto& input : refCnnNetwork.getInputsInfo()) {
|
||||
const auto& info = input.second;
|
||||
refInfos.push_back(info);
|
||||
}
|
||||
|
||||
for (std::size_t i = 0; i < inputs.size(); ++i) {
|
||||
const auto& input = inputs[i];
|
||||
const auto& info = refInfos[i];
|
||||
|
||||
refInferRequest.SetBlob(info->name(), input);
|
||||
}
|
||||
|
||||
refInferRequest.Infer();
|
||||
|
||||
auto refOutputs = std::vector<InferenceEngine::Blob::Ptr>{};
|
||||
for (const auto& output : refCnnNetwork.getOutputsInfo()) {
|
||||
const auto& name = output.first;
|
||||
refOutputs.push_back(refInferRequest.GetBlob(name));
|
||||
}
|
||||
|
||||
auto referenceOutputs = std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>>(refOutputs.size());
|
||||
for (std::size_t i = 0; i < refOutputs.size(); ++i) {
|
||||
const auto& reference = refOutputs[i];
|
||||
const auto refSize = reference->byteSize();
|
||||
|
||||
referenceOutputs[i].first = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(reference->getTensorDesc().getPrecision());
|
||||
auto& expectedOutput = referenceOutputs[i].second;
|
||||
expectedOutput.resize(refSize);
|
||||
|
||||
auto refMemory = InferenceEngine::as<InferenceEngine::MemoryBlob>(reference);
|
||||
IE_ASSERT(refMemory);
|
||||
const auto refLockedMemory = refMemory->wmap();
|
||||
const auto referenceBuffer = refLockedMemory.as<const std::uint8_t*>();
|
||||
|
||||
std::copy(referenceBuffer, referenceBuffer + refSize, expectedOutput.data());
|
||||
}
|
||||
|
||||
return referenceOutputs;
|
||||
InferenceEngine::Blob::Ptr Basic_LSTM_S::GenerateInput(const InferenceEngine::InputInfo& info) const {
|
||||
return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), weights_range.second, weights_range.first, 1);
|
||||
}
|
||||
|
||||
} // namespace SubgraphTestsDefinitions
|
||||
|
Loading…
Reference in New Issue
Block a user