add snippets for automatic batching (#10910)
* add snippets for automatic batching * Update docs/snippets/ov_auto_batching.py Co-authored-by: Alexey Lebedev <alexey.lebedev@intel.com> * add missing bracket Co-authored-by: Alexey Lebedev <alexey.lebedev@intel.com>
This commit is contained in:
committed by
GitHub
parent
5f27c74d96
commit
840e622da5
@@ -76,7 +76,7 @@ For example, the application processes only 4 video streams, so there is no need
|
||||
|
||||
.. doxygensnippet:: docs/snippets/ov_auto_batching.py
|
||||
:language: python
|
||||
:fragment: hint_num_requests]
|
||||
:fragment: [hint_num_requests]
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
|
||||
@@ -1,41 +1,33 @@
|
||||
#include <openvino/runtime/core.hpp>
|
||||
from openvino.runtime import Core
|
||||
|
||||
int main() {
|
||||
ov::Core core;
|
||||
auto model = core.read_model("sample.xml");
|
||||
core = Core()
|
||||
model = core.read_model(model="sample.xml")
|
||||
|
||||
//! [compile_model]
|
||||
{
|
||||
auto compiled_model = core.compile_model(model, "GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT));
|
||||
}
|
||||
//! [compile_model]
|
||||
# [compile_model]
|
||||
config = {"PERFORMANCE_HINT": "THROUGHPUT"}
|
||||
compiled_model = core.compile_model(model, "GPU", config)
|
||||
# [compile_model]
|
||||
|
||||
//! [compile_model_no_auto_batching]
|
||||
{
|
||||
// disabling the automatic batching
|
||||
// leaving intact other configurations options that the device selects for the 'throughput' hint
|
||||
auto compiled_model = core.compile_model(model, "GPU", {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
|
||||
ov::hint::allow_auto_batching(false)});
|
||||
}
|
||||
//! [compile_model_no_auto_batching]
|
||||
# [compile_model_no_auto_batching]
|
||||
# disabling the automatic batching
|
||||
# leaving intact other configurations options that the device selects for the 'throughput' hint
|
||||
config = {"PERFORMANCE_HINT": "THROUGHPUT",
|
||||
"ALLOW_AUTO_BATCHING": False}
|
||||
compiled_model = core.compile_model(model, "GPU", config)
|
||||
# [compile_model_no_auto_batching]
|
||||
|
||||
//! [query_optimal_num_requests]
|
||||
{
|
||||
// when the batch size is automatically selected by the implementation
|
||||
// it is important to query/create and run the sufficient #requests
|
||||
auto compiled_model = core.compile_model(model, "GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT));
|
||||
auto num_requests = compiled_model.get_property(ov::optimal_number_of_infer_requests);
|
||||
}
|
||||
//! [query_optimal_num_requests]
|
||||
# [query_optimal_num_requests]
|
||||
# when the batch size is automatically selected by the implementation
|
||||
# it is important to query/create and run the sufficient requests
|
||||
config = {"PERFORMANCE_HINT": "THROUGHPUT"}
|
||||
compiled_model = core.compile_model(model, "GPU", config)
|
||||
num_requests = compiled_model.get_property("OPTIMAL_NUMBER_OF_INFER_REQUESTS")
|
||||
# [query_optimal_num_requests]
|
||||
|
||||
//! [hint_num_requests]
|
||||
{
|
||||
// limiting the available parallel slack for the 'throughput' hint via the ov::hint::num_requests
|
||||
// so that certain parameters (like selected batch size) are automatically accommodated accordingly
|
||||
auto compiled_model = core.compile_model(model, "GPU", {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
|
||||
ov::hint::num_requests(4)});
|
||||
}
|
||||
//! [hint_num_requests]
|
||||
|
||||
return 0;
|
||||
}
|
||||
# [hint_num_requests]
|
||||
config = {"PERFORMANCE_HINT": "THROUGHPUT",
|
||||
"PERFORMANCE_HINT_NUM_REQUESTS": "4"}
|
||||
# limiting the available parallel slack for the 'throughput'
|
||||
# so that certain parameters (like selected batch size) are automatically accommodated accordingly
|
||||
compiled_model = core.compile_model(model, "GPU", config)
|
||||
# [hint_num_requests]
|
||||
|
||||
Reference in New Issue
Block a user