add snippets for automatic batching (#10910)

* add snippets for automatic batching

* Update docs/snippets/ov_auto_batching.py

Co-authored-by: Alexey Lebedev <alexey.lebedev@intel.com>

* add missing bracket

Co-authored-by: Alexey Lebedev <alexey.lebedev@intel.com>
This commit is contained in:
Bartek Szmelczynski
2022-03-15 12:17:20 +01:00
committed by GitHub
parent 5f27c74d96
commit 840e622da5
2 changed files with 29 additions and 37 deletions

View File

@@ -76,7 +76,7 @@ For example, the application processes only 4 video streams, so there is no need
.. doxygensnippet:: docs/snippets/ov_auto_batching.py
:language: python
:fragment: hint_num_requests]
:fragment: [hint_num_requests]
@endsphinxdirective

View File

@@ -1,41 +1,33 @@
#include <openvino/runtime/core.hpp>
from openvino.runtime import Core
int main() {
ov::Core core;
auto model = core.read_model("sample.xml");
core = Core()
model = core.read_model(model="sample.xml")
//! [compile_model]
{
auto compiled_model = core.compile_model(model, "GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT));
}
//! [compile_model]
# [compile_model]
config = {"PERFORMANCE_HINT": "THROUGHPUT"}
compiled_model = core.compile_model(model, "GPU", config)
# [compile_model]
//! [compile_model_no_auto_batching]
{
// disabling the automatic batching
// leaving intact other configurations options that the device selects for the 'throughput' hint
auto compiled_model = core.compile_model(model, "GPU", {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
ov::hint::allow_auto_batching(false)});
}
//! [compile_model_no_auto_batching]
# [compile_model_no_auto_batching]
# disabling the automatic batching
# leaving intact other configurations options that the device selects for the 'throughput' hint
config = {"PERFORMANCE_HINT": "THROUGHPUT",
"ALLOW_AUTO_BATCHING": False}
compiled_model = core.compile_model(model, "GPU", config)
# [compile_model_no_auto_batching]
//! [query_optimal_num_requests]
{
// when the batch size is automatically selected by the implementation
// it is important to query/create and run the sufficient #requests
auto compiled_model = core.compile_model(model, "GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT));
auto num_requests = compiled_model.get_property(ov::optimal_number_of_infer_requests);
}
//! [query_optimal_num_requests]
# [query_optimal_num_requests]
# when the batch size is automatically selected by the implementation
# it is important to query/create and run the sufficient requests
config = {"PERFORMANCE_HINT": "THROUGHPUT"}
compiled_model = core.compile_model(model, "GPU", config)
num_requests = compiled_model.get_property("OPTIMAL_NUMBER_OF_INFER_REQUESTS")
# [query_optimal_num_requests]
//! [hint_num_requests]
{
// limiting the available parallel slack for the 'throughput' hint via the ov::hint::num_requests
// so that certain parameters (like selected batch size) are automatically accommodated accordingly
auto compiled_model = core.compile_model(model, "GPU", {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
ov::hint::num_requests(4)});
}
//! [hint_num_requests]
return 0;
}
# [hint_num_requests]
config = {"PERFORMANCE_HINT": "THROUGHPUT",
"PERFORMANCE_HINT_NUM_REQUESTS": "4"}
# limiting the available parallel slack for the 'throughput'
# so that certain parameters (like selected batch size) are automatically accommodated accordingly
compiled_model = core.compile_model(model, "GPU", config)
# [hint_num_requests]