From 4feaeaad687ecd86cb4ff0079e1c406313b70976 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 11 Jan 2023 15:14:03 +0400 Subject: [PATCH] [GPU] Remote context reuse and internal config update (#14635) --- .../include/intel_gpu/graph/build_options.hpp | 488 -------- .../include/intel_gpu/graph/network.hpp | 20 +- .../include/intel_gpu/graph/program.hpp | 23 +- .../intel_gpu/plugin/compiled_model.hpp | 10 +- .../intel_gpu/plugin/device_config.hpp | 105 -- .../include/intel_gpu/plugin/graph.hpp | 23 +- .../intel_gpu/plugin/infer_request.hpp | 3 +- .../intel_gpu/plugin/infer_request_legacy.hpp | 1 + .../intel_gpu/plugin/internal_properties.hpp | 23 - .../intel_gpu/plugin/legacy_api_helper.hpp | 23 + .../include/intel_gpu/plugin/plugin.hpp | 63 +- .../include/intel_gpu/plugin/program.hpp | 25 +- .../intel_gpu/plugin/remote_allocators.hpp | 99 ++ .../include/intel_gpu/plugin/remote_blob.hpp | 171 +++ .../intel_gpu/plugin/remote_context.hpp | 621 ++-------- .../plugin/transformations_pipeline.hpp | 7 +- .../intel_gpu/plugin/variable_state.hpp | 4 +- .../primitives/implementation_desc.hpp | 53 +- .../include/intel_gpu/runtime/engine.hpp | 36 +- .../runtime/engine_configuration.hpp | 96 +- .../intel_gpu/runtime/execution_config.hpp | 162 +++ .../intel_gpu/runtime/internal_properties.hpp | 99 ++ .../include/intel_gpu/runtime/lru_cache.hpp | 2 + .../include/intel_gpu/runtime/stream.hpp | 11 +- .../src/graph/compilation_context.cpp | 8 +- .../graph_optimizer/add_required_reorders.cpp | 2 +- .../graph/graph_optimizer/compile_graph.cpp | 2 +- .../graph_optimizer/graph_initializations.cpp | 6 +- .../graph_optimizer/pre_replace_deconv.cpp | 2 +- .../graph_optimizer/prepare_buffer_fusing.cpp | 12 +- .../prepare_primitive_fusing.cpp | 7 +- .../graph_optimizer/propagate_constants.cpp | 13 +- .../select_preferred_formats.cpp | 1 + .../graph/impls/ocl/binary_convolution.cpp | 6 +- .../src/graph/impls/ocl/convolution.cpp | 6 +- .../impls/onednn/concatenation_onednn.cpp | 3 +- .../graph/impls/onednn/convolution_onednn.cpp | 3 +- .../impls/onednn/deconvolution_onednn.cpp | 3 +- .../impls/onednn/fully_connected_onednn.cpp | 3 +- .../src/graph/impls/onednn/gemm_onednn.cpp | 3 +- .../src/graph/impls/onednn/pooling_onednn.cpp | 3 +- .../impls/onednn/primitive_onednn_base.h | 22 +- .../graph/impls/onednn/reduction_onednn.cpp | 3 +- .../src/graph/impls/onednn/reorder_onednn.cpp | 3 +- .../src/graph/impls/onednn/utils.cpp | 4 +- .../src/graph/include/compilation_context.hpp | 2 +- .../src/graph/include/condition_inst.h | 2 +- .../graph/include/kernel_selector_helper.h | 2 +- .../src/graph/include/layout_optimizer.h | 2 +- .../intel_gpu/src/graph/include/loop_inst.h | 6 +- .../src/graph/include/pass_manager.h | 4 +- .../src/graph/include/program_dump_graph.h | 3 +- .../intel_gpu/src/graph/kernel_runner.cpp | 4 +- .../src/graph/kernel_selector_helper.cpp | 25 +- .../intel_gpu/src/graph/layout_optimizer.cpp | 6 +- src/plugins/intel_gpu/src/graph/network.cpp | 61 +- .../intel_gpu/src/graph/pass_manager.cpp | 2 +- .../intel_gpu/src/graph/primitive_inst.cpp | 31 +- src/plugins/intel_gpu/src/graph/program.cpp | 140 ++- .../src/graph/program_dump_graph.cpp | 13 +- .../intel_gpu/src/graph/program_node.cpp | 12 +- .../intel_gpu/src/plugin/compiled_model.cpp | 131 +-- .../intel_gpu/src/plugin/device_config.cpp | 499 -------- src/plugins/intel_gpu/src/plugin/graph.cpp | 30 +- .../intel_gpu/src/plugin/infer_request.cpp | 67 +- .../src/plugin/infer_request_legacy.cpp | 40 +- .../src/plugin/legacy_api_helper.cpp | 272 +++++ .../src/plugin/ops/adaptive_pooling.cpp | 2 +- .../intel_gpu/src/plugin/ops/constant.cpp | 4 +- .../intel_gpu/src/plugin/ops/convolution.cpp | 2 +- .../src/plugin/ops/ctc_greedy_decoder.cpp | 2 +- ...xperimental_detectron_detection_output.cpp | 4 +- ...ectron_generate_proposals_single_image.cpp | 2 +- ...mental_detectron_roi_feature_extractor.cpp | 2 +- .../src/plugin/ops/generate_proposals.cpp | 4 +- src/plugins/intel_gpu/src/plugin/ops/loop.cpp | 8 +- .../intel_gpu/src/plugin/ops/matrix_nms.cpp | 4 +- .../src/plugin/ops/multiclass_nms.cpp | 4 +- .../src/plugin/ops/non_max_suppression.cpp | 4 +- .../intel_gpu/src/plugin/ops/normalize_l2.cpp | 4 +- .../intel_gpu/src/plugin/ops/parameter.cpp | 6 +- .../intel_gpu/src/plugin/ops/pooling.cpp | 2 +- .../intel_gpu/src/plugin/ops/proposal.cpp | 2 +- .../src/plugin/ops/tensor_iterator.cpp | 8 +- src/plugins/intel_gpu/src/plugin/ops/topk.cpp | 2 +- src/plugins/intel_gpu/src/plugin/plugin.cpp | 1000 +++++++---------- src/plugins/intel_gpu/src/plugin/program.cpp | 73 +- .../src/plugin/remote_allocators.cpp | 68 ++ .../intel_gpu/src/plugin/remote_blob.cpp | 285 +++++ .../intel_gpu/src/plugin/remote_context.cpp | 533 ++++----- .../src/plugin/transformations_pipeline.cpp | 25 +- .../intel_gpu/src/plugin/variable_state.cpp | 23 +- src/plugins/intel_gpu/src/runtime/engine.cpp | 27 +- .../src/runtime/execution_config.cpp | 196 ++++ .../intel_gpu/src/runtime/kernels_cache.cpp | 73 +- .../intel_gpu/src/runtime/kernels_cache.hpp | 9 +- .../ocl/ocl_command_queues_builder.cpp | 34 +- .../ocl/ocl_command_queues_builder.hpp | 9 +- .../intel_gpu/src/runtime/ocl/ocl_device.cpp | 2 +- .../intel_gpu/src/runtime/ocl/ocl_engine.cpp | 45 +- .../intel_gpu/src/runtime/ocl/ocl_engine.hpp | 19 +- .../src/runtime/ocl/ocl_engine_factory.hpp | 3 +- .../intel_gpu/src/runtime/ocl/ocl_stream.cpp | 62 +- .../intel_gpu/src/runtime/ocl/ocl_stream.hpp | 10 +- src/plugins/intel_gpu/src/runtime/stream.cpp | 2 +- .../tests/fusions/activation_fusion_test.cpp | 12 +- .../fusions/batch_to_space_fusion_test.cpp | 4 +- .../binary_convolution_fusion_test.cpp | 4 +- .../tests/fusions/concatenate_fusion_test.cpp | 22 +- .../tests/fusions/convolution_fusion_test.cpp | 215 ++-- .../fusions/deconvolution_fusion_test.cpp | 8 +- .../fusions/depth_to_space_fusion_test.cpp | 4 +- .../tests/fusions/eltwise_fusion_test.cpp | 36 +- .../fusions/fully_connected_fusion_test.cpp | 20 +- .../tests/fusions/fusion_test_common.hpp | 15 +- .../tests/fusions/fusion_validity_test.cpp | 30 +- .../fusions/gather_elements_fusion_test.cpp | 4 +- .../tests/fusions/gather_fusion_test.cpp | 4 +- .../tests/fusions/gather_nd_fusion_test.cpp | 4 +- .../tests/fusions/gemm_fusion_test.cpp | 16 +- .../tests/fusions/loop_fusion_test.cpp | 4 +- .../tests/fusions/lrn_fusion_test.cpp | 12 +- .../tests/fusions/mvn_fusion_test.cpp | 4 +- .../tests/fusions/normalize_fusion_test.cpp | 4 +- .../tests/fusions/permute_fusion_test.cpp | 8 +- .../tests/fusions/pooling_fusion_test.cpp | 33 +- .../tests/fusions/reduce_fusion_test.cpp | 4 +- .../tests/fusions/resample_fusion_test.cpp | 4 +- .../scatter_elements_update_fusion_test.cpp | 4 +- .../fusions/scatter_nd_update_fusion_test.cpp | 4 +- .../fusions/scatter_update_fusion_test.cpp | 4 +- .../tests/fusions/softmax_fusion_test.cpp | 4 +- .../fusions/space_to_batch_fusion_test.cpp | 4 +- .../fusions/space_to_depth_fusion_test.cpp | 4 +- .../graph_manipulation_gpu_test.cpp | 14 +- .../tests/module_tests/usm_memory_test.cpp | 4 +- .../passes/prepare_buffer_fusing_test.cpp | 12 +- .../passes/prepare_primitive_fusing_test.cpp | 60 +- .../tests/passes/reorder_inputs_test.cpp | 36 +- .../passes/select_preferred_formats_test.cpp | 12 +- .../passes/test_module_fusing_reorder.cpp | 47 +- .../tests/shape_infer/broadcast_si_test.cpp | 9 +- .../test_cases/activation_simple_gpu_test.cpp | 21 +- .../test_cases/add_reorders_gpu_test.cpp | 6 +- .../tests/test_cases/arg_max_gpu_test.cpp | 8 +- .../tests/test_cases/barriers_test.cpp | 10 +- .../binary_convolution_gpu_test.cpp | 20 +- .../tests/test_cases/broadcast_gpu_test.cpp | 6 +- .../intel_gpu/tests/test_cases/cache_test.cpp | 26 +- .../tests/test_cases/cl_mem_input_test.cpp | 4 +- .../tests/test_cases/command_queue_test.cpp | 62 +- .../test_cases/concatenation_gpu_test.cpp | 115 +- .../tests/test_cases/condition_gpu_test.cpp | 54 +- .../tests/test_cases/convolution_gpu_test.cpp | 361 +++--- .../tests/test_cases/crop_gpu_test.cpp | 86 +- .../tests/test_cases/cum_sum_gpu_test.cpp | 6 +- .../test_cases/deconvolution_gpu_test.cpp | 246 ++-- .../test_cases/depth_concatenate_gpu_test.cpp | 96 +- .../test_cases/depth_to_space_gpu_test.cpp | 6 +- .../test_cases/detection_output_test.cpp | 54 +- .../tests/test_cases/eltwise_gpu_test.cpp | 54 +- .../test_cases/fully_connected_gpu_test.cpp | 84 +- .../tests/test_cases/gather_gpu_test.cpp | 6 +- .../tests/test_cases/gemm_gpu_test.cpp | 31 +- .../test_cases/lstm_dynamic_gpu_test.cpp | 12 +- .../tests/test_cases/memory_test.cpp | 96 +- .../test_cases/multiclass_nms_gpu_test.cpp | 6 +- .../test_cases/multiple_streams_gpu_test.cpp | 12 +- .../tests/test_cases/mvn_gpu_test.cpp | 22 +- .../test_cases/non_max_suppression_test.cpp | 44 +- .../tests/test_cases/permute_gpu_test.cpp | 36 +- .../tests/test_cases/pooling_gpu_test.cpp | 73 +- .../tests/test_cases/prior_box_gpu_test.cpp | 6 +- .../propagate_constants_gpu_test.cpp | 6 +- .../tests/test_cases/quantize_gpu_test.cpp | 15 +- .../test_cases/random_uniform_gpu_test.cpp | 6 +- .../tests/test_cases/range_gpu_test.cpp | 11 +- .../tests/test_cases/reduce_gpu_test.cpp | 33 +- .../test_cases/removing_output_node_test.cpp | 6 +- .../tests/test_cases/reorder_gpu_test.cpp | 178 ++- .../tests/test_cases/resample_gpu_test.cpp | 144 ++- .../tests/test_cases/reshape_gpu_test.cpp | 54 +- .../test_cases/set_output_memory_gpu_test.cpp | 8 +- .../tests/test_cases/shape_of_gpu_test.cpp | 6 +- .../tests/test_cases/softmax_gpu_test.cpp | 10 +- .../tests/test_cases/split_gpu_test.cpp | 12 +- .../tests/test_cases/streams_test.cpp | 6 +- .../test_cases/strided_slice_gpu_test.cpp | 12 +- .../test_device_mem_usage_estimation.cpp | 9 +- .../test_cases/trim_to_outputs_gpu_test.cpp | 24 +- .../intel_gpu/tests/test_cases/variable.cpp | 6 +- .../intel_gpu/tests/test_utils/network_test.h | 18 +- .../intel_gpu/tests/test_utils/test_utils.cpp | 49 +- .../intel_gpu/tests/test_utils/test_utils.h | 15 +- .../gpu/concurrency/gpu_concurrency_tests.cpp | 2 +- .../dx11_remote_ctx_test.cpp | 61 +- .../gpu_remote_tensor_tests.cpp | 95 +- .../behavior/ov_plugin/core_integration.cpp | 8 +- 198 files changed, 4477 insertions(+), 4876 deletions(-) delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/graph/build_options.hpp delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/device_config.hpp delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/internal_properties.hpp create mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_api_helper.hpp create mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/remote_allocators.hpp create mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/remote_blob.hpp create mode 100644 src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp create mode 100644 src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp delete mode 100644 src/plugins/intel_gpu/src/plugin/device_config.cpp create mode 100644 src/plugins/intel_gpu/src/plugin/legacy_api_helper.cpp create mode 100644 src/plugins/intel_gpu/src/plugin/remote_allocators.cpp create mode 100644 src/plugins/intel_gpu/src/plugin/remote_blob.cpp create mode 100644 src/plugins/intel_gpu/src/runtime/execution_config.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/build_options.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/build_options.hpp deleted file mode 100644 index 0ba9fba6e8b..00000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/build_options.hpp +++ /dev/null @@ -1,488 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "intel_gpu/runtime/engine.hpp" -#include "intel_gpu/primitives/implementation_desc.hpp" - -#include "topology.hpp" - -#include -#include -#include -#include -#include - -namespace cldnn { - -/// @addtogroup cpp_api C++ API -/// @{ - -/// @defgroup cpp_program Program compilation -/// @{ - -/// @brief Represents user-provided program build option type. -enum class build_option_type { - /// @brief Allow primitives fusing during program build (default: false). - fusing, - - /// @brief Enable implicit reordering for user inputs (default: false). - optimize_data, - - /// @brief Enable implicit static input reordering for user inputs (default: false). - allow_static_input_reorder, - - /// @brief Enable debug mode (default: false). - /// @details This option enforce all program primitives to be accessible as outputs. - debug, - - /// @brief User selected list of program outputs. - outputs, - - /// @brief User defined learning parameters. - learning_config, - - /// @brief Tuning config (default: Tuning is disabled). - /// @details The tuner will automatically find the optimal kernel/config for each node in the graph, - /// by running multiple implementations and configurations per node and storing the optimal one in cache. - /// Expect long execution time in the first run. - /// After the first run a cache with the tuning results will be created in the path provided. - /// This cache will be used in the next runs. - tuning_config, - - /// @brief Specifies a directory to which stages of network compilation should be dumped. (default: empty, i.e. no dumping) - graph_dumps_dir, - /// @brief Specifies a directory to which compiled kernels should be cached or can be loaded from. (default: empty, i.e. no caching) - kernels_cache_dir, - /// @brief Name for serialization process - serialize_network, - load_program, - force_implementations, - partial_build_program, - allow_new_shape_infer -}; - -/// @brief Tuning mode. -enum class tuning_mode { - /// @brief Tuning is disabled. - tuning_disabled, - - /// @brief Tuning using the cached data (no on-line tuning for non-existing data). - tuning_use_cache, - - /// @brief Tuning using the cached data if exist, tune and update cache otherwise. - tuning_tune_and_cache, - - /// @brief Tuning using the cached data and update tasks. - /// @details Performs updating tasks like removal of invalid caches, promoting to new format, etc. - /// No tuning for non-existing data. - tuning_use_and_update, - - /// @brief Retune the cache data even if it exists. - tuning_retune_and_cache -}; - -/// @brief Tuning configuration. -struct tuning_config_options { - tuning_mode mode; - std::string cache_file_path; - - tuning_config_options() : mode(tuning_mode::tuning_disabled), cache_file_path("") {} -}; - -/// @brief Learning parameters. -struct learning_params { - float momentum = 0.0; - float weights_decay = 0.0; - - learning_params() : momentum(0.9f), weights_decay(0.0005f) {} -}; - -/// @brief Represents user-provided program build option. -struct build_option { - /// @brief Allow primitives fusing during program build (default: false). - static std::shared_ptr fusing(bool enable = false); - - /// @brief Enable implicit reordering for user inputs (default: false). - static std::shared_ptr optimize_data(bool enable = false); - - /// @brief Enable implicit reordering for static user inputs (default: false). - static std::shared_ptr allow_static_input_reorder(bool enable = false); - - /// @brief Enable debug mode (default: false). - /// @details This option enforce all program primitives to be accessible as outputs. - static std::shared_ptr debug(bool enable = false); - - /// @brief User selected list of program outputs. - static std::shared_ptr outputs(const std::vector& outs); - - /// @brief Tuning configuration (default: false). - /// @details This option will automatically find the optimal kernel/config for each node in the graph, - /// by running multiple implementations and configurations per node and storing the optimal one in cache. - /// Expect long execution time in the first run (unless the cache only mode is enabled). - /// After the first run a cache with the tuning results will be created in the path provided. - /// This cache will be used in the next runs. - static std::shared_ptr tuning_config( - const tuning_config_options& config = tuning_config_options()); - - /// @brief Specifies a directory to which stages of network compilation should be dumped (default: empty, i.e. no dumping) - static std::shared_ptr graph_dumps_dir(const std::string& dir_path); - - /// @brief Specifies a directory to which compiled kernels should be cached or can be loaded from. (default: empty, i.e. no caching) - static std::shared_ptr kernels_cache_dir(const std::string& dir_path); - - /// @brief Specifies a name for serialization process. - static std::shared_ptr serialize_network(const std::string& network_name); - /// @brief Specifies a name of load_program process. - static std::shared_ptr load_program(const std::string& network_name); - - /// @brief User defined learning parameters. - static std::shared_ptr learning_config(const learning_params& params = learning_params()); - /// @brief Specifies user defined implementation details to use. - static std::shared_ptr force_implementations(implementation_forcing_map forcing); - - static std::shared_ptr partial_build_program(bool set = false); - - static std::shared_ptr allow_new_shape_infer(bool set = false); - - virtual ~build_option() = default; - -private: - /// @brief Returns option type represented by this object. - virtual build_option_type get_type() const = 0; - - friend class build_options; -}; - -/// @brief @ref build_option specialization for boolean options. -template -struct build_option_bool : build_option { - /// @brief Constructs option. - /// @param value Is option enabled. - explicit build_option_bool(bool value) : _value(value ? 1 : 0) {} - - /// @brief Is option enabled. - bool enabled() const { return _value != 0; } - -private: - build_option_type get_type() const override { return OptType; } - uintptr_t _value; -}; - -/// @brief @ref build_option specialization for program outputs list. -struct build_option_outputs : build_option { - /// @brief The list of output ids (names) - const std::vector outputs; - - /// @brief Constructs option. - /// @param outs List of ouput ids (names) - explicit build_option_outputs(const std::vector& outs) - : outputs(outs) {} - -private: - /// @brief Returns build_option_type::outputs. - build_option_type get_type() const override { return build_option_type::outputs; } - - build_option_outputs(const build_option_outputs& other) = delete; - build_option_outputs& operator=(const build_option_outputs& other) = delete; -}; - -/// @brief @ref build_option specialization for learning config. -struct build_option_learning_config : build_option { - /// @brief Learning parameters. - const learning_params params; - - /// @brief Constructs learning config build option. - /// @param learning_params Parameters for learning. - explicit build_option_learning_config(const learning_params& params) - : params(params) {} - -private: - /// @brief Returns build_option_type::learning_config. - build_option_type get_type() const override { return build_option_type::learning_config; } - - build_option_learning_config(const build_option_learning_config& other) = delete; - build_option_learning_config& operator=(const build_option_learning_config& other) = delete; -}; - -/// @brief @ref build_option specialization for tuning config. -struct build_option_tuning_config : build_option { - /// @brief Tuning configuration - const tuning_config_options config; - - /// @brief Constructs tuning config build option. - /// @param tuning_config Configuration for the tuning. - explicit build_option_tuning_config(const tuning_config_options& tuning_config) - : config(tuning_config) {} - -private: - /// @brief Returns build_option_type::tuning_config. - build_option_type get_type() const override { return build_option_type::tuning_config; } - - build_option_tuning_config(const build_option_tuning_config& other) = delete; - build_option_tuning_config& operator=(const build_option_tuning_config& other) = delete; -}; - -/// @brief @ref build_option specialization for selecting a directory. -template -struct build_option_directory : build_option { - const std::string directory_path; - - /// @brief Constructs option. - /// @param outs List of ouput ids (names) - explicit build_option_directory(const std::string& dir_path) : directory_path(dir_path) {} - -private: - /// @brief Returns build_option_type::graph_dumps_dir. - build_option_type get_type() const override { return build_option_type::graph_dumps_dir; } - - build_option_directory(const build_option_directory& other) = delete; - build_option_directory& operator=(const build_option_directory& other) = delete; -}; - -/// @brief @ref build_option specialization for selecting a directory. -template -struct build_option_kernels_cache_dir : build_option { - const std::string directory_path; - - explicit build_option_kernels_cache_dir(const std::string& dir_path) : directory_path(dir_path) {} - -private: - /// @brief Returns build_option_type::kernels_cache_dir. - build_option_type get_type() const override { return build_option_type::kernels_cache_dir; } - - build_option_kernels_cache_dir(const build_option_kernels_cache_dir& other) = delete; - build_option_kernels_cache_dir& operator=(const build_option_kernels_cache_dir& other) = delete; -}; - -/// @brief @ref build_option specialization for serialization process. -template -struct build_option_serialization : build_option { - const std::string serialization_network_name; - - explicit build_option_serialization(const std::string& name) : serialization_network_name(name) {} - -private: - build_option_type get_type() const override { return build_option_type::serialize_network; } - - build_option_serialization(const build_option_serialization& other) = delete; - build_option_serialization& operator=(const build_option_serialization& other) = delete; -}; - -/// @brief @ref build_option specialization for load_program process. -template -struct build_option_load_program : build_option { - const std::string load_program_name; - - explicit build_option_load_program(const std::string& name) : load_program_name(name) {} - -private: - build_option_type get_type() const override { return build_option_type::load_program; } - - build_option_load_program(const build_option_load_program& other) = delete; - build_option_load_program& operator=(const build_option_load_program& other) = delete; -}; - -struct build_option_force_implementations : build_option { - implementation_forcing_map forcing; - - explicit build_option_force_implementations(implementation_forcing_map _forcing) : forcing(std::move(_forcing)) {} -private: - build_option_type get_type() const override { return build_option_type::force_implementations; } - - build_option_force_implementations(const build_option_force_implementations& other) = delete; - build_option_force_implementations& operator=(const build_option_force_implementations& other) = delete; -}; - -namespace detail { -/// @brief Helper template to convert @ref build_option_type value to particular @ref build_option class. -template -struct build_option_traits { - /// @brief @ref build_option object type which represents the particular @p OptType. - typedef build_option object_type; - /// @brief Make default @ref build_option corresponding @p OptType - static std::shared_ptr make_default(); -}; - -#ifndef DOXYGEN_SHOULD_SKIP_THIS -template <> -struct build_option_traits { - typedef build_option_bool object_type; - static std::shared_ptr make_default() { return build_option::fusing(); } -}; -template <> -struct build_option_traits { - typedef build_option_bool object_type; - static std::shared_ptr make_default() { return build_option::optimize_data(); } -}; -template <> -struct build_option_traits { - typedef build_option_bool object_type; - static std::shared_ptr make_default() { return build_option::allow_static_input_reorder(); } -}; -template <> -struct build_option_traits { - typedef build_option_bool object_type; - static std::shared_ptr make_default() { return build_option::debug(); } -}; -template <> -struct build_option_traits { - typedef build_option_outputs object_type; - static std::shared_ptr make_default() { return build_option::outputs({}); } -}; -template <> -struct build_option_traits { - typedef build_option_learning_config object_type; - static std::shared_ptr make_default() { return build_option::learning_config(); } -}; -template <> -struct build_option_traits { - typedef build_option_tuning_config object_type; - static std::shared_ptr make_default() { return build_option::tuning_config(); } -}; -template <> -struct build_option_traits { - typedef build_option_directory object_type; - static std::shared_ptr make_default() { return build_option::graph_dumps_dir({}); } -}; -template <> -struct build_option_traits { - typedef build_option_directory object_type; - static std::shared_ptr make_default() { return build_option::kernels_cache_dir({}); } -}; -template <> -struct build_option_traits { - typedef build_option_serialization object_type; - static std::shared_ptr make_default() { return build_option::serialize_network({}); } -}; -template <> -struct build_option_traits { - typedef build_option_load_program object_type; - static std::shared_ptr make_default() { return build_option::load_program({}); } -}; -template <> -struct build_option_traits { - using object_type = build_option_force_implementations; - static std::shared_ptr make_default() { return build_option::force_implementations({}); } -}; -template <> -struct build_option_traits { - typedef build_option_bool object_type; - static std::shared_ptr make_default() { return build_option::partial_build_program(); } -}; -template <> -struct build_option_traits { - typedef build_option_bool object_type; - static std::shared_ptr make_default() { return build_option::allow_new_shape_infer(); } -}; - -#endif -} // namespace detail - -#ifndef DOXYGEN_SHOULD_SKIP_THIS -inline std::shared_ptr build_option::fusing(bool enable) { - return std::make_shared>(enable); -} - -inline std::shared_ptr build_option::optimize_data(bool enable) { - return std::make_shared>(enable); -} - -inline std::shared_ptr build_option::allow_static_input_reorder(bool enable) { - return std::make_shared>(enable); -} - -inline std::shared_ptr build_option::debug(bool enable) { - return std::make_shared>(enable); -} - -inline std::shared_ptr build_option::outputs(const std::vector& outs) { - return std::make_shared(outs); -} - -inline std::shared_ptr build_option::learning_config(const learning_params& params) { - return std::make_shared(params); -} - -inline std::shared_ptr build_option::tuning_config(const tuning_config_options& config) { - return std::make_shared(config); -} - -inline std::shared_ptr build_option::graph_dumps_dir(const std::string& dir_path) { - return std::make_shared>(dir_path); -} - -inline std::shared_ptr build_option::kernels_cache_dir(const std::string& dir_path) { - return std::make_shared>(dir_path); -} -inline std::shared_ptr build_option::serialize_network(const std::string& name) { - return std::make_shared>(name); -} -inline std::shared_ptr build_option::load_program(const std::string& name) { - return std::make_shared>(name); -} -inline std::shared_ptr build_option::force_implementations(implementation_forcing_map forcing) { - return std::make_shared(std::move(forcing)); -} - -inline std::shared_ptr build_option::partial_build_program(bool enable) { - return std::make_shared>(enable); -} - -inline std::shared_ptr build_option::allow_new_shape_infer(bool enable) { - return std::make_shared>(enable); -} - -#endif - -/// @brief Represents program build options list. -class build_options { -public: - /// @brief Adds or replace option to the options list - void set_option(std::shared_ptr opt) { add_or_replace_option(opt); } - - /// @brief Adds or replace options to the options list - template - void set_option(std::shared_ptr opt, Args... args) { - add_or_replace_option(opt); - set_option(args...); - } - - /// @brief Constructs build options list from its arguments. - template - explicit build_options(Args... args) { - set_option(args...); - } - - /// @brief Returns program build option for @p OptType - template - std::shared_ptr::object_type> get() const { - using T = typename detail::build_option_traits::object_type; - for (auto& option : _options) { - if (option->get_type() == OptType) - return std::static_pointer_cast(option); - } - return std::static_pointer_cast(detail::build_option_traits::make_default()); - } - -private: - friend struct program; - std::vector> _options; - void set_option(void) {} - - void add_or_replace_option(std::shared_ptr opt) { - for (auto& p : _options) { - if (p->get_type() == opt->get_type()) { - p = opt; - return; - } - } - _options.push_back(opt); - } -}; - -/// @} -/// @} -} // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp index 7db717b64f7..9c7740db7b9 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp @@ -33,7 +33,7 @@ struct network_output { memory::ptr get_memory() const { // TODO: in_order queue doesn't create proper output event in some cases which leads to syncronization issues with user app // So call finish for associated stream to enusre that the output data is ready. - if (_stream->get_queue_type() == queue_types::in_order) { + if (_stream->get_queue_type() == QueueTypes::in_order) { _stream->finish(); } else { _event->wait(); @@ -67,14 +67,15 @@ public: }; using variables_states_map = std::map; - explicit network(program::ptr program, stream::ptr stream, bool is_internal = false, bool is_primary_stream = true); + explicit network(program::ptr program, const ExecutionConfig& config, stream::ptr stream, bool is_internal = false, bool is_primary_stream = true); network(engine& engine, const topology& topo, - const build_options& options = build_options(), + const ExecutionConfig& config = {}, bool is_internal = false); network(engine& engine, const std::set>& nodes, - const build_options& options, + const ExecutionConfig& config, + std::shared_ptr task_executor, bool is_internal); network(program::ptr program, uint16_t stream_id = 0); @@ -82,6 +83,7 @@ public: network(program::ptr program, stream::ptr stream, uint16_t stream_id); network(cldnn::BinaryInputBuffer& ifs, stream::ptr stream, engine& engine, uint16_t stream_id = 0); + network(cldnn::BinaryInputBuffer& ifs, const ExecutionConfig& config, stream::ptr stream, engine& engine, uint16_t stream_id = 0); ~network(); @@ -89,11 +91,12 @@ public: static ptr build_network(engine& engine, const topology& topology, - const build_options& options = build_options(), + const ExecutionConfig& config = {}, bool is_internal = false); static ptr build_network(engine& engine, const std::set>& nodes, - const build_options& options, + const ExecutionConfig& config, + std::shared_ptr task_executor, bool is_internal); static ptr allocate_network(stream::ptr stream, @@ -121,7 +124,7 @@ public: network_output get_output(const primitive_id& output_id) { event::ptr evt; - if (get_stream().get_queue_type() == queue_types::out_of_order) + if (get_stream().get_queue_type() == QueueTypes::out_of_order) evt = get_primitive_event(output_id); return network_output(evt, get_output_memory(output_id), get_stream_ptr()); } @@ -236,10 +239,13 @@ public: ICompilationContext& get_compilation_context() const { return *_compilation_context; } std::mutex& get_impl_cache_mutex() const { return _in_mem_cache_mutex; } + const ExecutionConfig& get_config() const { return _config; } + private: using output_chains_map = std::map>>; uint32_t net_id = 0; program::ptr _program; + ExecutionConfig _config; engine& _engine; stream::ptr _stream; std::unique_ptr _memory_pool; diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index a3a8a95f275..7ba9d4a2094 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -7,7 +7,7 @@ #include "intel_gpu/runtime/engine.hpp" #include "intel_gpu/runtime/stream.hpp" #include "intel_gpu/runtime/lru_cache.hpp" -#include "build_options.hpp" +#include "intel_gpu/runtime/execution_config.hpp" #include #include @@ -126,19 +126,22 @@ public: program(engine& engine_ref, topology const& topology, - build_options const& options, + const ExecutionConfig& config, bool is_internal = false, bool no_optimizations = false, bool is_body_program = false); - /* constructor used to build a program from subset of nodes of other program (used in propagate_constants) */ + program(engine& engine_ref, std::set> const& nodes, - build_options const& options, + const ExecutionConfig& config, + std::shared_ptr task_executor, bool is_internal); + explicit program(engine& engine); ~program(); engine& get_engine() const { return _engine; } - const build_options& get_options() const { return options; } + const ExecutionConfig& get_config() const { return _config; } + InferenceEngine::CPUStreamsExecutor::Ptr get_task_executor() const { return _task_executor; } std::list& get_inputs() { return inputs; } // ToDo: redesign trim to ouptut pass to make it const as_well as get_engine and get options @@ -146,7 +149,6 @@ public: return outputs; } // ToDo: redesign reorder-inputs pass to make it const as_well as get_engine and get options bool is_loop_body() const { return is_body_program; } - bool is_debug_build() const { return options.get()->enabled(); } const nodes_ordering& get_processing_order() const; nodes_ordering& get_processing_order(); uint32_t get_prog_id() { return prog_id; } @@ -230,13 +232,14 @@ public: static ptr build_program(engine& engine, const topology& topology, - const build_options& options, + const ExecutionConfig& config, bool is_internal = false, bool no_optimizations = false, bool is_body_program = false); static ptr build_program(engine& engine, const std::set>& nodes, - const build_options& options, + const ExecutionConfig& config, + std::shared_ptr task_executor, bool is_internal); static void init_primitives(); void compile(); @@ -261,7 +264,8 @@ private: stream::ptr _stream; // TODO: Consider moving it to engine std::unique_ptr _kernels_cache; - build_options options; + ExecutionConfig _config; + std::shared_ptr _task_executor = nullptr; std::list inputs; std::vector outputs; nodes_ordering processing_order; @@ -308,6 +312,7 @@ private: void cleanup(); void transfer_memory_to_device(); + std::shared_ptr make_task_executor(const ExecutionConfig& config) const; /* ** Analysis functions */ diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp index ab82970a24d..2bbb634e0b3 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp @@ -14,8 +14,8 @@ #include "cpp/ie_cnn_network.h" #include #include "intel_gpu/plugin/graph.hpp" -#include "intel_gpu/plugin/device_config.hpp" #include "intel_gpu/plugin/remote_context.hpp" +#include "intel_gpu/runtime/execution_config.hpp" namespace ov { namespace intel_gpu { @@ -24,8 +24,8 @@ class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault public: typedef std::shared_ptr Ptr; - CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_ptr context, Config config); - CompiledModel(std::istream& networkModel, std::shared_ptr context, Config config); + CompiledModel(InferenceEngine::CNNNetwork &network, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config); + CompiledModel(std::istream& networkModel, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config); void Export(std::ostream& networkModel) override; std::shared_ptr GetExecGraphInfo() override; @@ -42,8 +42,8 @@ public: std::shared_ptr GetContext() const override; std::vector> m_graphs; - InferenceEngine::gpu::ClContext::Ptr m_context; - Config m_config; + InferenceEngine::RemoteContext::Ptr m_context; + ExecutionConfig m_config; InferenceEngine::ITaskExecutor::Ptr m_taskExecutor; InferenceEngine::ITaskExecutor::Ptr m_waitExecutor; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/device_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/device_config.hpp deleted file mode 100644 index 364f1159238..00000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/device_config.hpp +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include - -#include "intel_gpu/plugin/custom_layer.hpp" -#include "intel_gpu/runtime/debug_configuration.hpp" -#include "intel_gpu/graph/network.hpp" -#include "openvino/runtime/intel_gpu/properties.hpp" -#include -#include - -namespace ov { -namespace intel_gpu { - - -struct Config { - Config(std::string device_id = "0") : device_id(device_id), - throughput_streams(1), - useProfiling(false), - dumpCustomKernels(false), - exclusiveAsyncRequests(false), - enableDynamicBatch(false), - enableInt8(true), - nv12_two_inputs(false), - queuePriority(cldnn::priority_mode_types::med), - queueThrottle(cldnn::throttle_mode_types::med), - max_dynamic_batch(1), - customLayers({}), - kernels_cache_dir(""), - inference_precision(ov::element::f16), - task_exec_config({"GPU plugin internal task executor", // name - std::max(1, static_cast(std::thread::hardware_concurrency())), // # of streams - 1, // # of threads per streams - InferenceEngine::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, // thread binding type - 1, // thread binding step - 0, // thread binding offset - 1, // # of threads - InferenceEngine::IStreamsExecutor::Config::ANY}), // preferred core type - enable_loop_unrolling(true) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->serialize_compile == 1) { - task_exec_config._streams = 1; - } - - adjustKeyMapValues(); - } - - uint32_t GetDefaultNStreamsForThroughputMode() const { - return 2; - } - void UpdateFromMap(const std::map& configMap, const cldnn::device_info& info); - void adjustKeyMapValues(); - static bool isNewApiProperty(std::string property); - static std::string ConvertPropertyToLegacy(const std::string& key, const std::string& value); - - bool CanShareContextWith(const Config& other) const; - - std::string device_id; - uint16_t throughput_streams; - bool useProfiling; - bool dumpCustomKernels; - bool exclusiveAsyncRequests; - bool enableDynamicBatch; - bool enableInt8; - bool nv12_two_inputs; - cldnn::priority_mode_types queuePriority; - cldnn::throttle_mode_types queueThrottle; - int max_dynamic_batch; - CustomLayerMap customLayers; - std::string kernels_cache_dir; - ov::element::Type inference_precision; - InferenceEngine::IStreamsExecutor::Config task_exec_config; - - bool enable_loop_unrolling; - - std::map key_config_map; - InferenceEngine::PerfHintsConfig perfHintsConfig; -}; - -struct Configs { - using conf_iter = std::map::iterator; - Configs(Config conf = Config()) : configs({std::make_pair(default_device_id, conf.device_id = default_device_id)}) { } - - void CreateConfig(std::string device_id); - Config& GetConfig(std::string device_id); - Config& GetDefaultDeviceConfig(); - - void SetDefaultDeviceID(std::string default_device_id) { this->default_device_id = default_device_id; } - std::string GetDefaultDeviceID() { return default_device_id; } - - conf_iter begin() { return configs.begin(); } - conf_iter end() { return configs.end(); } - -private: - std::string default_device_id = "0"; - std::map configs; -}; - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp index 47c2e99ac00..f49a1c0e681 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp @@ -23,8 +23,8 @@ #include #include "intel_gpu/plugin/custom_layer.hpp" -#include "intel_gpu/plugin/device_config.hpp" #include "intel_gpu/plugin/remote_context.hpp" +#include "intel_gpu/plugin/remote_blob.hpp" #include "intel_gpu/plugin/program.hpp" namespace ov { @@ -40,8 +40,11 @@ public: typedef std::shared_ptr Ptr; using variable_states_map = std::map>; - Graph(InferenceEngine::CNNNetwork& network, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0); - Graph(cldnn::BinaryInputBuffer& ib, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0); + Graph(InferenceEngine::CNNNetwork& network, + RemoteContextImpl::Ptr context, + const ExecutionConfig& config, + uint16_t stream_id = 0); + Graph(cldnn::BinaryInputBuffer& ib, RemoteContextImpl::Ptr context, const ExecutionConfig& config, uint16_t stream_id = 0); explicit Graph(std::shared_ptr graph, uint16_t stream_id = 0); void Export(cldnn::BinaryOutputBuffer &ob); std::shared_ptr GetExecGraphInfo(); @@ -51,10 +54,10 @@ public: std::map GetPerformanceCounts() const; void UpdatePerfStatistics(); - const Config& getConfig() const { return m_config; } - InferenceEngine::gpu::ClContext::Ptr GetContext() { return m_context; } - std::shared_ptr GetEngine() const { return getContextImpl(m_context)->GetEngine(); } - int GetMaxDynamicBatchSize() const { return getConfig().max_dynamic_batch; } + cldnn::engine& get_engine() const { return m_context->get_engine(); } + const ExecutionConfig& get_config() const { return m_config; } + + int GetMaxDynamicBatchSize() const { return m_config.get_property(ov::intel_gpu::max_dynamic_batch); } const std::map& GetInputLayouts() const { return m_program->GetInputLayouts(); } const InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_program->GetNetworkInputs(); } const InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_program->GetNetworkOutputs(); } @@ -85,16 +88,15 @@ public: bool use_external_queue() const; protected: - InferenceEngine::gpu::ClContext::Ptr m_context; + RemoteContextImpl::Ptr m_context; std::shared_ptr m_program; std::string m_networkName; - Config m_config; + ExecutionConfig m_config; uint16_t m_stream_id; uint32_t m_state; std::condition_variable m_cv; std::mutex m_infer_mutex; - std::vector> m_networks; std::map primitiveIDs; std::map> prevPrimitiveIDs; @@ -104,7 +106,6 @@ protected: std::map outputDims; - std::shared_ptr BuildNetwork(std::shared_ptr program); void Build(); void UpdateLayersMaps(); diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp index 64e62d3853e..483dbfc3a05 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp @@ -70,6 +70,7 @@ private: bool m_useStreams = false; bool m_useExternalQueue = false; std::shared_ptr m_graph; + InferenceEngine::gpu::ClContext::Ptr m_context = nullptr; InferenceEngine::IStreamsExecutor* streamExecutor = nullptr; @@ -90,7 +91,7 @@ private: template::value || std::is_same::value>::type> InferenceEngine::Blob::Ptr create_remote_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, - const RemoteBlobImpl::BlobType mem_type, void* mem_ptr = nullptr); + const BlobType mem_type, void* mem_ptr = nullptr); InferenceEngine::Blob::Ptr create_shared_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, void* usm_host_mem); void allocate_inputs(); void allocate_outputs(); diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request_legacy.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request_legacy.hpp index 809bbe36835..62357e680cb 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request_legacy.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request_legacy.hpp @@ -79,6 +79,7 @@ private: bool m_useStreams = false; bool m_useExternalQueue = false; std::shared_ptr m_graph; + InferenceEngine::gpu::ClContext::Ptr m_context = nullptr; // dynamic batch stuff std::map> batchInputs; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/internal_properties.hpp deleted file mode 100644 index f367f351cc3..00000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/internal_properties.hpp +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "openvino/runtime/properties.hpp" - -namespace ov { -namespace intel_gpu { - -/** - * @brief Read-only property to get GPU driver version - */ -static constexpr Property driver_version{"GPU_DRIVER_VERSION"}; - -/** - * @brief Read-only property to get GPU driver version - */ -static constexpr Property device_id{"GPU_DEVICE_ID"}; - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_api_helper.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_api_helper.hpp new file mode 100644 index 00000000000..769cafde43d --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_api_helper.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/runtime/execution_config.hpp" + +namespace ov { +namespace intel_gpu { + +class LegacyAPIHelper { +public: + static ov::AnyMap convert_legacy_properties(const std::map& properties, bool is_new_api); + static ov::AnyMap convert_legacy_properties(const ov::AnyMap& properties, bool is_new_api); + static std::pair convert_legacy_property(const std::pair& legacy_property); + static std::pair convert_to_legacy_property(const std::pair& property); + static bool is_legacy_property(const std::pair& property, bool is_new_api); + static bool is_new_api_property(const std::pair& property); + static std::vector get_supported_configs(); + static std::vector get_supported_metrics(bool model_caching_enabled); +}; + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp index 4d6bf160e0c..6fbdb55e4ea 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp @@ -15,34 +15,37 @@ namespace ov { namespace intel_gpu { -using CustomLayerPtr = std::shared_ptr; - -class Plugin : public InferenceEngine::IInferencePlugin, - public InferenceEngine::gpu::details::param_map_obj_getter { +class Plugin : public InferenceEngine::IInferencePlugin { struct impl; std::shared_ptr _impl; - bool streamsSet = false; - bool throttlingSet = false; bool isModelCachingEnabled = false; + std::string default_device_id = "0"; // key: device_id, value: cldnn device std::map device_map; + std::map m_configs_map; // key: cldnn context, value: memory statistics - mutable std::map> statistics_map; + mutable std::map> statistics_map; mutable std::mutex engine_mutex; - mutable std::map m_defaultContexts; + mutable std::map m_default_contexts; - cldnn::device_info GetDeviceInfo(const std::map &config) const; - InferenceEngine::CNNNetwork CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network, - const Config& config) const; - void TransformNetwork(std::shared_ptr& model, const Config& config) const; - std::map ConvertPerfHintsToConfig(const std::map& network_config, - const Config& plugin_config) const; + InferenceEngine::CNNNetwork clone_and_transform_model(const InferenceEngine::CNNNetwork& network, + const ExecutionConfig& config) const; + void transform_model(std::shared_ptr& model, const ExecutionConfig& config) const; + void register_primitives(); + void update_memory_statistics(const RemoteContextImpl::Ptr& context) const; + std::string get_device_id_from_config(const std::map& config) const; + std::string get_device_id(const std::map& config) const; + RemoteCLContext::Ptr get_default_context(const std::string& device_id) const; + + std::vector get_supported_properties() const; + std::vector get_device_capabilities(const cldnn::device_info& info) const; + uint32_t get_optimal_batch_size(const std::map& options) const; + uint32_t get_max_batch_size(const std::map& options) const; + + ov::AnyMap preprocess_config(const std::map& orig_config) const; - void RegisterPrimitives(); - void UpdateConfig(Config& conf, const InferenceEngine::CNNNetwork &network, const std::map ¶ms) const; - void UpdateStatistics(const RemoteCLContext::Ptr& context) const; public: Plugin(); @@ -54,7 +57,6 @@ public: const std::map &config) override; void SetConfig(const std::map &config) override; - std::string GetDeviceIDFromConfig(const std::map& config) const; InferenceEngine::Parameter GetConfig(const std::string& name, const std::map& options) const override; InferenceEngine::Parameter GetMetric(const std::string& name, const std::map& options) const override; InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network, @@ -64,31 +66,6 @@ public: std::shared_ptr CreateContext(const InferenceEngine::ParamMap& params) override; std::shared_ptr GetDefaultContext(const InferenceEngine::ParamMap& params) override; - - struct PluginParams { - cldnn::queue_types queue_type; - cldnn::engine_types engine_type; - cldnn::runtime_types runtime_type; - bool use_unified_shared_memory; - InferenceEngine::ITaskExecutor::Ptr task_executor; - }; - - static PluginParams GetParams(const Config& config, const cldnn::device::ptr& dev, - InferenceEngine::gpu_handle_param external_queue = nullptr) { - PluginParams params; - params.engine_type = cldnn::engine_types::ocl; - params.runtime_type = cldnn::runtime_types::ocl; - if (external_queue) { - params.queue_type = cldnn::stream::detect_queue_type(params.engine_type, external_queue); - } else if (dev->get_info().supports_immad) { - params.queue_type = cldnn::queue_types::in_order; - } else { - params.queue_type = cldnn::queue_types::out_of_order; - } - params.use_unified_shared_memory = true; - params.task_executor = std::make_shared(config.task_exec_config); - return params; - } }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp index 4fb926009bf..a3875d49494 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp @@ -14,11 +14,14 @@ #include #include +#include "gpu/gpu_config.hpp" -#include "intel_gpu/plugin/device_config.hpp" +#include "intel_gpu/plugin/custom_layer.hpp" #include "intel_gpu/runtime/engine.hpp" +#include "intel_gpu/runtime/execution_config.hpp" #include "intel_gpu/graph/topology.hpp" +#include "intel_gpu/graph/program.hpp" // Forward declarations for cldnn part namespace cldnn { @@ -78,20 +81,14 @@ public: class Program { public: - Program(InferenceEngine::CNNNetwork& network, std::shared_ptr engine, const Config& config, + Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config, bool createTopologyOnly = false, bool partialBuild = false); - Program(std::shared_ptr engine, const Config& config) + Program(cldnn::engine& engine, const ExecutionConfig& config) : m_max_batch(1) , m_curBatch(-1) , m_config(config) , m_engine(engine) , queryMode(false) {} - Program() - : m_max_batch(1) - , m_curBatch(-1) - , m_config() - , m_engine(nullptr) - , queryMode(false) {} static const cldnn::primitive_id m_preProcessTag; static const cldnn::primitive_id m_meanValuesTag; @@ -109,6 +106,7 @@ public: std::map inputLayouts; using BlobCacheKey = std::pair>; std::map blobMemCache; + CustomLayerMap m_custom_layers; int m_max_batch; int m_curBatch; @@ -119,9 +117,8 @@ public: const std::map& GetInputLayouts() const { return inputLayouts; } InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_networkInputs; } InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_networkOutputs; } - cldnn::engine& GetEngine() const { return *m_engine; } - std::shared_ptr GetEnginePtr() const { return m_engine; } - const Config& GetConfig() const { return m_config; } + cldnn::engine& get_engine() const { return m_engine; } + const ExecutionConfig& get_config() const { return m_config; } int GetMaxBatchSizeForSingleProgram(); bool IsOpSupported(const InferenceEngine::CNNNetwork& network, const std::shared_ptr& op); @@ -166,8 +163,8 @@ public: private: static factories_map_t factories_map; std::vector> m_programs; - Config m_config; - std::shared_ptr m_engine; + ExecutionConfig m_config; + cldnn::engine& m_engine; std::shared_ptr m_topology; InferenceEngine::InputsDataMap m_networkInputs; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_allocators.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_allocators.hpp new file mode 100644 index 00000000000..ffbbf1a0f1f --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_allocators.hpp @@ -0,0 +1,99 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "intel_gpu/plugin/remote_context.hpp" + +#include +#include +#include +#include + +namespace ov { +namespace intel_gpu { + +class RemoteBlobImpl; + +class RemoteAllocator : public InferenceEngine::IAllocator { +protected: + friend class RemoteBlobImpl; + std::atomic_flag _lock; + std::map m_lockedBlobs; + + void regLockedBlob(void* handle, const RemoteBlobImpl* blob); + +public: + using Ptr = std::shared_ptr; + + RemoteAllocator() { _lock.clear(std::memory_order_relaxed); } + /** + * @brief Maps handle to heap memory accessible by any memory manipulation routines. + * @return Generic pointer to memory + */ + void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE) noexcept override { return handle; }; + /** + * @brief Unmaps memory by handle with multiple sequential mappings of the same handle. + * The multiple sequential mappings of the same handle are suppose to get the same + * result while there isn't a ref counter supported. + */ + void unlock(void* handle) noexcept override; + /** + * @brief Allocates memory + * @param size The size in bytes to allocate + * @return Handle to the allocated resource + */ + void* alloc(size_t size) noexcept override { return nullptr; } + /** + * @brief Releases handle and all associated memory resources which invalidates the handle. + * @return false if handle cannot be released, otherwise - true. + */ + bool free(void* handle) noexcept override { return true; } + + void lock() { + while (_lock.test_and_set(std::memory_order_acquire)) {} + } + + void unlock() { + _lock.clear(std::memory_order_release); + } +}; + +class USMHostAllocator : public InferenceEngine::IAllocator { +protected: + InferenceEngine::gpu::USMBlob::Ptr _usm_host_blob = nullptr; + InferenceEngine::gpu::ClContext::Ptr _context = nullptr; + +public: + using Ptr = std::shared_ptr; + + USMHostAllocator(InferenceEngine::gpu::ClContext::Ptr context) : _context(context) { } + /** + * @brief Maps handle to heap memory accessible by any memory manipulation routines. + * @return Generic pointer to memory + */ + void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE) noexcept override; + + /** + * @brief Unmaps memory by handle with multiple sequential mappings of the same handle. + * The multiple sequential mappings of the same handle are suppose to get the same + * result while there isn't a ref counter supported. + */ + void unlock(void* handle) noexcept override; + + /** + * @brief Allocates memory + * @param size The size in bytes to allocate + * @return Handle to the allocated resource + */ + void* alloc(size_t size) noexcept override; + /** + * @brief Releases handle and all associated memory resources which invalidates the handle. + * @return false if handle cannot be released, otherwise - true. + */ + bool free(void* handle) noexcept override; +}; + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_blob.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_blob.hpp new file mode 100644 index 00000000000..a44b94a22fc --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_blob.hpp @@ -0,0 +1,171 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "intel_gpu/runtime/memory.hpp" +#include "intel_gpu/runtime/engine.hpp" +#include "intel_gpu/plugin/common_utils.hpp" + +#ifndef NOMINMAX +# define NOMINMAX +#endif + +#ifndef OV_GPU_USE_OPENCL_HPP +#define OV_GPU_USE_OPENCL_HPP +#endif + +#ifdef _WIN32 +# include +#else +# include +#endif + +#include +#include +#include + +namespace ov { +namespace intel_gpu { +class RemoteContextImpl; + +class RemoteBlobImpl : public InferenceEngine::gpu::details::param_map_obj_getter { + friend class RemoteAllocator; +public: + explicit RemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context, + cldnn::stream& stream, + const cldnn::layout& layout, + cldnn::shared_handle mem = nullptr, + cldnn::shared_surface surf = 0, + uint32_t plane = 0, + BlobType mem_type = BlobType::BT_BUF_INTERNAL); + + void allocate(); + bool deallocate() noexcept; + InferenceEngine::ParamMap getParams() const; + std::string getDeviceName() const noexcept; + std::shared_ptr getContext() const noexcept; + InferenceEngine::LockedMemory buffer() noexcept; + InferenceEngine::LockedMemory cbuffer() const noexcept; + InferenceEngine::LockedMemory rwmap() noexcept; + InferenceEngine::LockedMemory rmap() const noexcept; + InferenceEngine::LockedMemory wmap() noexcept; + const std::shared_ptr &getAllocator() const noexcept; + void *getHandle() const noexcept { return _handle; } + + void reinterpret(cldnn::layout new_layout); + + bool is_allocated() const noexcept; + bool is_locked() const noexcept; + cldnn::memory::ptr get_memory() { return m_memory_object; } + +protected: + std::shared_ptr m_allocator; + InferenceEngine::gpu::ClContext::Ptr m_context; + cldnn::stream& m_stream; + + // constructor stuff + cldnn::shared_handle m_mem; + cldnn::shared_surface m_surf; + + uint32_t m_plane; + cldnn::layout m_layout; + BlobType m_mem_type; + size_t m_hash; + + cldnn::memory::ptr m_memory_object; + + mutable std::mutex lockedMutex; + mutable size_t lockedCounter; + mutable std::unique_ptr> lockedHolder; + mutable void* _handle; + + void lock() const; + void unlock() const; + + bool supports_caching() const; +}; + +template +class TypedRemoteBlob : public TpublicAPI { +public: + using Ptr = std::shared_ptr; + + explicit TypedRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context, + cldnn::stream& stream, + const InferenceEngine::TensorDesc& desc, + const cldnn::layout& layout, + cldnn::shared_handle mem = nullptr, + cldnn::shared_surface surf = 0, + uint32_t plane = 0, + BlobType mem_type = BlobType::BT_BUF_INTERNAL) + : TpublicAPI(desc) + , _impl(context, stream, layout, mem, surf, plane, mem_type) {} + + void allocate() noexcept override { + try { + if (!_impl.is_allocated()) + _impl.allocate(); + } catch (...) {} + } + bool deallocate() noexcept override { return _impl.deallocate(); } + InferenceEngine::ParamMap getParams() const override { return _impl.getParams(); } + std::string getDeviceName() const noexcept override { return _impl.getDeviceName(); } + std::shared_ptr getContext() const noexcept override { return _impl.getContext(); } + InferenceEngine::LockedMemory buffer() noexcept override { return _impl.buffer(); } + InferenceEngine::LockedMemory cbuffer() const noexcept override { return _impl.cbuffer(); } + InferenceEngine::LockedMemory rwmap() noexcept override { return _impl.rwmap(); } + InferenceEngine::LockedMemory rmap() const noexcept override { return _impl.rmap(); } + InferenceEngine::LockedMemory wmap()noexcept override { return _impl.wmap(); } + RemoteBlobImpl* getImpl() { return &_impl; } + +protected: + const std::shared_ptr &getAllocator() const noexcept override { return _impl.getAllocator(); } + void *getHandle() const noexcept override { return _impl.getHandle(); } + RemoteBlobImpl _impl; +}; + +using RemoteCLbuffer = TypedRemoteBlob; +using RemoteUSMbuffer = TypedRemoteBlob; +using RemoteCLImage2D = TypedRemoteBlob; +#ifdef _WIN32 +using RemoteD3DBuffer = TypedRemoteBlob; +using RemoteD3DSurface = TypedRemoteBlob; +#else +using RemoteVASurface = TypedRemoteBlob; +#endif + +inline RemoteBlobImpl* getBlobImpl(InferenceEngine::gpu::ClBlob* blobPtr) { +#ifdef _WIN32 + { + auto ptr = blobPtr->as(); + if (ptr) return ptr->getImpl(); + } + { + auto ptr = blobPtr->as(); + if (ptr) return ptr->getImpl(); + } +#else + { + auto ptr = blobPtr->as(); + if (ptr) return ptr->getImpl(); + } +#endif + { + auto ptr = blobPtr->as(); + if (ptr) return ptr->getImpl(); + } + { + auto ptr = blobPtr->as(); + if (ptr) return ptr->getImpl(); + } + { + auto ptr = blobPtr->as(); + if (ptr) return ptr->getImpl(); + } + return nullptr; +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp index 767ccb01ddf..e1ef70c7546 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp @@ -6,7 +6,7 @@ #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/engine.hpp" -#include "intel_gpu/plugin/device_config.hpp" +#include "intel_gpu/runtime/lru_cache.hpp" #include "intel_gpu/plugin/common_utils.hpp" #include @@ -35,559 +35,150 @@ namespace ov { namespace intel_gpu { -class RemoteAllocator; -class RemoteBlobImpl : public InferenceEngine::gpu::details::param_map_obj_getter { - friend class RemoteAllocator; -public: - enum BlobType { - BT_EMPTY, - BT_BUF_INTERNAL, - BT_BUF_SHARED, - BT_USM_SHARED, - BT_USM_HOST_INTERNAL, - BT_USM_DEVICE_INTERNAL, - BT_IMG_SHARED, - BT_SURF_SHARED, - BT_DX_BUF_SHARED, - }; - - explicit RemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context, - cldnn::stream& stream, - const cldnn::layout& layout, - cldnn::shared_handle mem = nullptr, - cldnn::shared_surface surf = 0, - uint32_t plane = 0, - BlobType mem_type = BT_BUF_INTERNAL); - - void allocate(); - bool deallocate() noexcept; - InferenceEngine::ParamMap getParams() const; - std::string getDeviceName() const noexcept; - std::shared_ptr getContext() const noexcept; - InferenceEngine::LockedMemory buffer() noexcept; - InferenceEngine::LockedMemory cbuffer() const noexcept; - InferenceEngine::LockedMemory rwmap() noexcept; - InferenceEngine::LockedMemory rmap() const noexcept; - InferenceEngine::LockedMemory wmap() noexcept; - const std::shared_ptr &getAllocator() const noexcept; - void *getHandle() const noexcept { return _handle; } - - void reinterpret(cldnn::layout new_layout); - - bool is_allocated() const noexcept; - bool is_locked() const noexcept; - cldnn::memory::ptr getMemory() { return m_memObject; } - -protected: - static RemoteAllocator m_allocator; - std::weak_ptr m_context; - // retain engine ptr to ensure that memory object can be released properly in cases when RemoteContext if deleted before RemoteTensor - std::shared_ptr m_engine; - cldnn::stream& m_stream; - - // constructor stuff - cldnn::shared_handle m_mem; - cldnn::shared_surface m_surf; - - uint32_t m_plane; - cldnn::layout m_layout; - BlobType m_mem_type; - - cldnn::memory::ptr m_memObject; - - mutable std::mutex lockedMutex; - mutable size_t lockedCounter; - mutable std::unique_ptr> lockedHolder; - mutable void* _handle; - mutable std::shared_ptr _allocator; - - void lock() const; - void unlock() const; +enum class BlobType { + BT_EMPTY, + BT_BUF_INTERNAL, + BT_BUF_SHARED, + BT_USM_SHARED, + BT_USM_HOST_INTERNAL, + BT_USM_DEVICE_INTERNAL, + BT_IMG_SHARED, + BT_SURF_SHARED, + BT_DX_BUF_SHARED, }; -template -class TypedRemoteBlob : public TpublicAPI { -public: - using Ptr = std::shared_ptr; - - explicit TypedRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context, - cldnn::stream& stream, - const InferenceEngine::TensorDesc& desc, - const cldnn::layout& layout, - cldnn::shared_handle mem = nullptr, - cldnn::shared_surface surf = 0, - uint32_t plane = 0, - RemoteBlobImpl::BlobType mem_type = RemoteBlobImpl::BlobType::BT_BUF_INTERNAL) - : TpublicAPI(desc) - , _impl(context, stream, layout, mem, surf, plane, mem_type) {} - - void allocate() noexcept override { - try { - if (!_impl.is_allocated()) - _impl.allocate(); - } catch (...) {} - } - bool deallocate() noexcept override { return _impl.deallocate(); } - InferenceEngine::ParamMap getParams() const override { return _impl.getParams(); } - std::string getDeviceName() const noexcept override { return _impl.getDeviceName(); } - std::shared_ptr getContext() const noexcept override { return _impl.getContext(); } - InferenceEngine::LockedMemory buffer() noexcept override { return _impl.buffer(); } - InferenceEngine::LockedMemory cbuffer() const noexcept override { return _impl.cbuffer(); } - InferenceEngine::LockedMemory rwmap() noexcept override { return _impl.rwmap(); } - InferenceEngine::LockedMemory rmap() const noexcept override { return _impl.rmap(); } - InferenceEngine::LockedMemory wmap()noexcept override { return _impl.wmap(); } - RemoteBlobImpl* getImpl() { return &_impl; } - -protected: - const std::shared_ptr &getAllocator() const noexcept override { return _impl.getAllocator(); } - void *getHandle() const noexcept override { return _impl.getHandle(); } - RemoteBlobImpl _impl; -}; - -using RemoteCLbuffer = TypedRemoteBlob; -using RemoteUSMbuffer = TypedRemoteBlob; -using RemoteCLImage2D = TypedRemoteBlob; -#ifdef _WIN32 -using RemoteD3DBuffer = TypedRemoteBlob; -using RemoteD3DSurface = TypedRemoteBlob; -#else -using RemoteVASurface = TypedRemoteBlob; -#endif - -inline RemoteBlobImpl* getBlobImpl(InferenceEngine::gpu::ClBlob* blobPtr) { -#ifdef _WIN32 - { - auto ptr = blobPtr->as(); - if (ptr) return ptr->getImpl(); - } - { - auto ptr = blobPtr->as(); - if (ptr) return ptr->getImpl(); - } -#else - { - auto ptr = blobPtr->as(); - if (ptr) return ptr->getImpl(); - } -#endif - { - auto ptr = blobPtr->as(); - if (ptr) return ptr->getImpl(); - } - { - auto ptr = blobPtr->as(); - if (ptr) return ptr->getImpl(); - } - { - auto ptr = blobPtr->as(); - if (ptr) return ptr->getImpl(); - } - return nullptr; +template +Result extract_object(const InferenceEngine::ParamMap& params, const std::string& key) { + auto itrHandle = params.find(key); + OPENVINO_ASSERT(itrHandle != params.end(), "[GPU] No parameter ", key, " found in ParamsMap"); + return itrHandle->second.as(); } -class RemoteAllocator : public InferenceEngine::IAllocator { -protected: - friend class RemoteBlobImpl; - std::atomic_flag _lock; - std::map m_lockedBlobs; - - void regLockedBlob(void* handle, const RemoteBlobImpl* blob); - -public: - using Ptr = std::shared_ptr; - - RemoteAllocator() { _lock.clear(std::memory_order_relaxed); } - /** - * @brief Maps handle to heap memory accessible by any memory manipulation routines. - * @return Generic pointer to memory - */ - void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE) noexcept override { return handle; }; - /** - * @brief Unmaps memory by handle with multiple sequential mappings of the same handle. - * The multiple sequential mappings of the same handle are suppose to get the same - * result while there isn't a ref counter supported. - */ - void unlock(void* handle) noexcept override; - /** - * @brief Allocates memory - * @param size The size in bytes to allocate - * @return Handle to the allocated resource - */ - void* alloc(size_t size) noexcept override { return nullptr; } - /** - * @brief Releases handle and all associated memory resources which invalidates the handle. - * @return false if handle cannot be released, otherwise - true. - */ - bool free(void* handle) noexcept override { return true; } - - void lock() { - while (_lock.test_and_set(std::memory_order_acquire)) {} - } - - void unlock() { - _lock.clear(std::memory_order_release); - } -}; - -class USMHostAllocator : public InferenceEngine::IAllocator { -protected: - InferenceEngine::gpu::USMBlob::Ptr _usm_host_blob = nullptr; - InferenceEngine::gpu::ClContext* _context = nullptr; - -public: - using Ptr = std::shared_ptr; - - USMHostAllocator(InferenceEngine::gpu::ClContext* context) : _context(context) { } - /** - * @brief Maps handle to heap memory accessible by any memory manipulation routines. - * @return Generic pointer to memory - */ - void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE) noexcept override { - if (!_usm_host_blob) - return nullptr; - try { - return _usm_host_blob->get(); - } catch (...) { - return nullptr; - } - }; - - /** - * @brief Unmaps memory by handle with multiple sequential mappings of the same handle. - * The multiple sequential mappings of the same handle are suppose to get the same - * result while there isn't a ref counter supported. - */ - void unlock(void* handle) noexcept override {} - - /** - * @brief Allocates memory - * @param size The size in bytes to allocate - * @return Handle to the allocated resource - */ - void* alloc(size_t size) noexcept override { - try { - auto td = InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, InferenceEngine::SizeVector{size}, InferenceEngine::Layout::C); - InferenceEngine::ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_HOST_BUFFER)}}; - _usm_host_blob = std::dynamic_pointer_cast(_context->CreateBlob(td, params)); - _usm_host_blob->allocate(); - if (!getBlobImpl(_usm_host_blob.get())->is_allocated()) { - return nullptr; - } - return _usm_host_blob->get(); - } catch (...) { - return nullptr; - } - } - - /** - * @brief Releases handle and all associated memory resources which invalidates the handle. - * @return false if handle cannot be released, otherwise - true. - */ - bool free(void* handle) noexcept override { - try { - _usm_host_blob = nullptr; - } catch(...) { } - return true; - } -}; - - -class ExecutionContextImpl : public InferenceEngine::gpu::details::param_map_obj_getter { +class RemoteContextImpl { public: enum ContextType { OCL, DEV_SHARED }; - using Ptr = std::shared_ptr; - using CPtr = std::shared_ptr; + using Ptr = std::shared_ptr; + using CPtr = std::shared_ptr; - explicit ExecutionContextImpl(std::shared_ptr plugin, - const InferenceEngine::ParamMap& params, - const Config& config = {}); + RemoteContextImpl(std::string device_name, std::vector devices); + RemoteContextImpl(const std::vector& known_contexts, const InferenceEngine::ParamMap& params); - InferenceEngine::ParamMap getParams() const; - std::string getDeviceName() const noexcept; + InferenceEngine::ParamMap get_params() const; + std::string get_device_name() const noexcept; + InferenceEngine::MemoryBlob::Ptr create_host_blob(InferenceEngine::gpu::ClContext::Ptr public_context, const InferenceEngine::TensorDesc& desc); + InferenceEngine::RemoteBlob::Ptr create_blob(InferenceEngine::gpu::ClContext::Ptr public_context, + const InferenceEngine::TensorDesc& desc, + const InferenceEngine::ParamMap& params = {}); - std::shared_ptr GetEngine() const { return m_engine; } - Config& GetConfig() { return m_config; } - ContextType GetType() const { return m_type; } - InferenceEngine::gpu_handle_param GetExternalQueue() const { return m_external_queue; } - const std::weak_ptr GetPlugin() const { return m_plugin; } + cldnn::engine& get_engine() { return *m_engine; } + InferenceEngine::gpu_handle_param get_external_queue() const { return m_external_queue; } - void lock() { - while (m_lock.test_and_set(std::memory_order_acquire)) {} - } + cldnn::memory::ptr try_get_cached_memory(size_t hash); + void add_to_cache(size_t hash, cldnn::memory::ptr memory); - void unlock() { - m_lock.clear(std::memory_order_release); - } +private: + std::string get_device_name(const std::vector& known_contexts, + const cldnn::device::ptr current_device); + InferenceEngine::RemoteBlob::Ptr reuse_surface(InferenceEngine::gpu::ClContext::Ptr public_context, + const InferenceEngine::TensorDesc& desc, + const InferenceEngine::ParamMap& params); + InferenceEngine::RemoteBlob::Ptr reuse_memory(InferenceEngine::gpu::ClContext::Ptr public_context, + const InferenceEngine::TensorDesc& desc, + cldnn::shared_handle mem, + BlobType blob_type); + InferenceEngine::RemoteBlob::Ptr create_buffer(InferenceEngine::gpu::ClContext::Ptr public_context, const InferenceEngine::TensorDesc& desc); + InferenceEngine::RemoteBlob::Ptr create_usm(InferenceEngine::gpu::ClContext::Ptr public_context, + const InferenceEngine::TensorDesc& desc, + BlobType alloc_type); + void check_if_shared(); -protected: - // TODO: refactor to unique_ptr std::shared_ptr m_engine; InferenceEngine::gpu_handle_param m_va_display; InferenceEngine::gpu_handle_param m_external_queue; - Config m_config; + static const size_t cache_capacity = 100; ContextType m_type; - std::weak_ptr m_plugin; - std::atomic_flag m_lock; + std::string m_device_name = ""; + const std::string m_plugin_name; + cldnn::LruCache m_memory_cache; + std::mutex m_cache_mutex; }; -template -class TypedExecutionContext : public TpublicContextAPI { - template - struct _Key { - T1 _surf; - T2 _plane; - - _Key(T1 surf, T2 plane) : _surf(surf), _plane(plane) {} - - bool operator<(const _Key &that) const { - return _surf < that._surf || (_surf == that._surf && _plane < that._plane); - } - }; - -#ifdef _WIN32 - using surf_key = _Key; -#else - using surf_key = _Key; -#endif - std::map shared_surf_reg; - std::map shared_obj_reg; - - InferenceEngine::RemoteBlob::Ptr reuse_surf(const InferenceEngine::TensorDesc& tensorDesc, const InferenceEngine::ParamMap& params) { - using namespace InferenceEngine; - using InferenceEngine::gpu::details::param_map_obj_getter; - InferenceEngine::RemoteBlob::Ptr ret = nullptr; - auto& stream = _impl.GetEngine()->get_program_stream(); - uint32_t plane = param_map_obj_getter::_ObjFromParamSimple(params, GPU_PARAM_KEY(VA_PLANE)); -#ifdef _WIN32 - cldnn::shared_handle mem = param_map_obj_getter::_ObjFromParamSimple(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE)); - surf_key skey(mem, plane); -#else - cldnn::shared_surface surf = param_map_obj_getter::_ObjFromParamSimple(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE)); - surf_key skey(surf, plane); -#endif - std::lock_guard locker(_impl); - - // try to locate previously shared surface - auto itr = shared_surf_reg.find(skey); - if (itr != shared_surf_reg.end()) { - ret = itr->second; - } else { - // unlickily, not found - create new and insert into registry - cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()), - ImageFormatFromLayout(tensorDesc.getLayout()), - tensor_from_dims(tensorDesc.getDims())); - auto smart_this = - std::dynamic_pointer_cast(this->shared_from_this()); -#ifdef _WIN32 - ret = std::make_shared(smart_this, stream, - tensorDesc, layout, mem, 0, plane, - RemoteBlobImpl::BlobType::BT_SURF_SHARED); -#else - ret = std::make_shared(smart_this, stream, - tensorDesc, layout, nullptr, surf, plane, - RemoteBlobImpl::BlobType::BT_SURF_SHARED); -#endif - shared_surf_reg[skey] = ret; - } - - return ret; - } - - InferenceEngine::RemoteBlob::Ptr reuse_obj(const InferenceEngine::TensorDesc& tensorDesc, - cldnn::shared_handle mem, - RemoteBlobImpl::BlobType blob_type) { - InferenceEngine::RemoteBlob::Ptr ret = nullptr; - - std::lock_guard locker(_impl); - auto& stream = _impl.GetEngine()->get_program_stream(); - - // try to locate previously shared object - auto itr = shared_obj_reg.find(mem); - if (itr != shared_obj_reg.end()) { - ret = itr->second; - } else { - // unlickily, not found - create new and insert into registry - cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()), - FormatFromLayout(tensorDesc.getLayout()), - tensor_from_dims(tensorDesc.getDims())); - auto smart_this = - std::dynamic_pointer_cast(this->shared_from_this()); - - switch (blob_type) { - case RemoteBlobImpl::BlobType::BT_BUF_SHARED: - ret = std::make_shared(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type); - break; - case RemoteBlobImpl::BlobType::BT_USM_SHARED: - ret = std::make_shared(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type); - break; - case RemoteBlobImpl::BlobType::BT_IMG_SHARED: - layout.format = ImageFormatFromLayout(tensorDesc.getLayout()); - ret = std::make_shared(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type); - break; -#ifdef _WIN32 - case RemoteBlobImpl::BlobType::BT_DX_BUF_SHARED: - ret = std::make_shared(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type); - break; -#endif - default: - break; - } - shared_obj_reg[mem] = ret; - } - - return ret; - } - - InferenceEngine::RemoteBlob::Ptr create_buffer(const InferenceEngine::TensorDesc& tensorDesc) { - cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()), - FormatFromLayout(tensorDesc.getLayout()), - tensor_from_dims(tensorDesc.getDims())); - auto smart_this = std::dynamic_pointer_cast(this->shared_from_this()); - auto& stream = _impl.GetEngine()->get_program_stream(); - return std::make_shared(smart_this, - stream, - tensorDesc, - layout, - nullptr, 0, 0, - RemoteBlobImpl::BlobType::BT_BUF_INTERNAL); - } - - InferenceEngine::RemoteBlob::Ptr create_usm(const InferenceEngine::TensorDesc& tensorDesc, RemoteBlobImpl::BlobType alloc_type) { - cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()), - FormatFromLayout(tensorDesc.getLayout()), - tensor_from_dims(tensorDesc.getDims())); - auto smart_this = std::dynamic_pointer_cast(this->shared_from_this()); - auto& stream = _impl.GetEngine()->get_program_stream(); - - return std::make_shared(smart_this, - stream, - tensorDesc, - layout, - nullptr, 0, 0, - alloc_type); - } - - void check_if_shared() { - if (GetType() != ExecutionContextImpl::ContextType::DEV_SHARED) - IE_THROW() << "Shared context is required to to share this type of memory"; - } - +// Template class below is needed to allow proper cast of user contexts +// We have the following public classes hierarchy: +// RemoteContext +// | +// ClContext +// | | +// VAContext D3DContext +// So our implementation must allow casting of context object to proper type user type (ClContext, VAContext or D3DContext) +// Thus we introduce this template which have 3 instances with different base classes: +// RemoteContext +// | +// ---------- ClContext ----------- +// | | | +// VAContext | D3DContext +// | | | +// RemoteVAContext RemoteCLContext RemoteD3DContext +// +// All these context types are just thin wrappers that calls common context internal impl (RemoteContextImpl) +template +class TypedRemoteContext : public PublicContextType { public: - using Ptr = std::shared_ptr; - using CPtr = std::shared_ptr; + using Ptr = std::shared_ptr; - explicit TypedExecutionContext(std::shared_ptr plugin, - const InferenceEngine::ParamMap& params, - const Config& config = {}) - : _impl(plugin, params, config) {} + TypedRemoteContext(std::string device_name, std::vector devices) + : m_impl(std::make_shared(device_name, devices)) {} + TypedRemoteContext(const std::vector& known_contexts, const InferenceEngine::ParamMap& params) + : m_impl(std::make_shared(known_contexts, params)) {} - ~TypedExecutionContext() { - shared_surf_reg.clear(); - shared_obj_reg.clear(); + InferenceEngine::ParamMap getParams() const override { return m_impl->get_params(); } + std::string getDeviceName() const noexcept override { return m_impl->get_device_name(); } + InferenceEngine::MemoryBlob::Ptr CreateHostBlob(const InferenceEngine::TensorDesc& desc) override { + return m_impl->create_host_blob(std::dynamic_pointer_cast(this->shared_from_this()), desc); + } + InferenceEngine::RemoteBlob::Ptr CreateBlob(const InferenceEngine::TensorDesc& desc, const InferenceEngine::ParamMap& params = {}) override { + return m_impl->create_blob(std::dynamic_pointer_cast(this->shared_from_this()), desc, params); } - InferenceEngine::ParamMap getParams() const override { return _impl.getParams(); } - std::string getDeviceName() const noexcept override { return _impl.getDeviceName(); } + RemoteContextImpl::Ptr get_impl() { return m_impl; } - InferenceEngine::MemoryBlob::Ptr CreateHostBlob(const InferenceEngine::TensorDesc& tensorDesc) override { - if (_impl.GetEngine()->use_unified_shared_memory()) - return std::dynamic_pointer_cast(make_blob_with_precision(tensorDesc, std::make_shared(this))); - else - return std::dynamic_pointer_cast(make_blob_with_precision(tensorDesc)); - } - - InferenceEngine::RemoteBlob::Ptr CreateBlob(const InferenceEngine::TensorDesc& tensorDesc, const InferenceEngine::ParamMap& params = {}) override { - using namespace InferenceEngine; - using InferenceEngine::gpu::details::param_map_obj_getter; - if (params.empty()) { - // user wants plugin to allocate blob by itself and return handle - return create_buffer(tensorDesc); - } else { - // user will supply shared object handle - std::string memTypeStr = param_map_obj_getter::_StrFromParams(params, GPU_PARAM_KEY(SHARED_MEM_TYPE)); - - bool is_usm = memTypeStr == GPU_PARAM_VALUE(USM_HOST_BUFFER) || - memTypeStr == GPU_PARAM_VALUE(USM_DEVICE_BUFFER) || - memTypeStr == GPU_PARAM_VALUE(USM_USER_BUFFER); - - if (is_usm && !_impl.GetEngine()->use_unified_shared_memory()) { - IE_THROW(NotAllocated) << "Can't create USM tensor as USM is not supported (or manually disabled) on current device"; - } - - if (GPU_PARAM_VALUE(VA_SURFACE) == memTypeStr) { - check_if_shared(); - return reuse_surf(tensorDesc, params); - } else if (GPU_PARAM_VALUE(USM_HOST_BUFFER) == memTypeStr) { - return create_usm(tensorDesc, RemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL); - } else if (GPU_PARAM_VALUE(USM_DEVICE_BUFFER) == memTypeStr) { - return create_usm(tensorDesc, RemoteBlobImpl::BlobType::BT_USM_DEVICE_INTERNAL); - } else { - RemoteBlobImpl::BlobType blob_type; - cldnn::shared_handle mem = nullptr; - - if (GPU_PARAM_VALUE(OCL_BUFFER) == memTypeStr) { - blob_type = RemoteBlobImpl::BlobType::BT_BUF_SHARED; - mem = param_map_obj_getter::_ObjFromParamSimple(params, GPU_PARAM_KEY(MEM_HANDLE)); - } else if (GPU_PARAM_VALUE(USM_USER_BUFFER) == memTypeStr) { - blob_type = RemoteBlobImpl::BlobType::BT_USM_SHARED; - mem = param_map_obj_getter::_ObjFromParamSimple(params, GPU_PARAM_KEY(MEM_HANDLE)); - } else if (GPU_PARAM_VALUE(OCL_IMAGE2D) == memTypeStr) { - blob_type = RemoteBlobImpl::BlobType::BT_IMG_SHARED; - mem = param_map_obj_getter::_ObjFromParamSimple(params, GPU_PARAM_KEY(MEM_HANDLE)); -#ifdef _WIN32 - } else if (GPU_PARAM_VALUE(DX_BUFFER) == memTypeStr) { - blob_type = RemoteBlobImpl::BlobType::BT_DX_BUF_SHARED; - mem = param_map_obj_getter::_ObjFromParamSimple(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE)); - check_if_shared(); -#endif - } else { - IE_THROW() << "Unsupported shared object type " << memTypeStr; - } - - return reuse_obj(tensorDesc, mem, blob_type); - } - } - } - - Config& GetConfig() { return _impl.GetConfig(); } - ExecutionContextImpl::ContextType GetType() const { return _impl.GetType(); } - - ExecutionContextImpl* getImpl() { return &_impl; } - -protected: - ExecutionContextImpl _impl; +private: + std::shared_ptr m_impl; }; -using RemoteCLContext = TypedExecutionContext; +using RemoteCLContext = TypedRemoteContext; #ifdef _WIN32 -using RemoteD3DContext = TypedExecutionContext; +using RemoteD3DContext = TypedRemoteContext; #else -using RemoteVAContext = TypedExecutionContext; +using RemoteVAContext = TypedRemoteContext; #endif -inline ExecutionContextImpl* getContextImpl(InferenceEngine::gpu::ClContext::Ptr ctxPtr) { +inline std::shared_ptr get_context_impl(InferenceEngine::gpu::ClContext::Ptr context) { + OPENVINO_ASSERT(context != nullptr, "[GPU] Couldn't get impl from invalid context object"); #ifdef _WIN32 - { - auto ptr = ctxPtr->as(); - if (ptr) return ptr->getImpl(); - } + if (auto ptr = context->as()) + return ptr->get_impl(); #else - { - auto ptr = ctxPtr->as(); - if (ptr) return ptr->getImpl(); - } + if (auto ptr = context->as()) + return ptr->get_impl(); #endif - { - auto ptr = ctxPtr->as(); - if (ptr) return ptr->getImpl(); - } - return nullptr; + if (auto ptr = context->as()) + return ptr->get_impl(); + + OPENVINO_ASSERT(false, "[GPU] Couldn't get context impl from public context object."); +} + +inline std::shared_ptr get_context_impl(InferenceEngine::RemoteContext::Ptr context) { + OPENVINO_ASSERT(context != nullptr, "[GPU] Couldn't get impl from invalid context object"); + auto casted = std::dynamic_pointer_cast(context); + OPENVINO_ASSERT(casted != nullptr, "[GPU] Couldn't get context impl: Context type is not ClContext or it's derivatives"); + return get_context_impl(casted); } } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/transformations_pipeline.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/transformations_pipeline.hpp index 7c10b444419..192044818dc 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/transformations_pipeline.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/transformations_pipeline.hpp @@ -8,19 +8,20 @@ #include -#include "intel_gpu/plugin/device_config.hpp" +#include "intel_gpu/runtime/execution_config.hpp" +#include "intel_gpu/runtime/device.hpp" namespace ov { namespace intel_gpu { class TransformationsPipeline { public: - explicit TransformationsPipeline(const Config &conf, const cldnn::device_info &device_info) + explicit TransformationsPipeline(const ExecutionConfig &conf, const cldnn::device_info &device_info) : config(conf), device_info(device_info) {} void apply(std::shared_ptr func); private: - Config config; + const ExecutionConfig& config; cldnn::device_info device_info; }; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp index e259aed2cca..ba8e0b5c366 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp @@ -13,7 +13,7 @@ namespace intel_gpu { class VariableState : public InferenceEngine::IVariableStateInternal { public: VariableState(const std::string& name, const std::vector& states, - std::shared_ptr engine, int currentBatch); + cldnn::engine& engine, int currentBatch); /** * @brief Reset internal variable state for relevant infer request, to a value specified as @@ -41,7 +41,7 @@ private: int currentBatch_; std::vector states_; InferenceEngine::TensorDesc desc_; - std::shared_ptr engine_; + cldnn::engine& engine_; }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp index aba499f6cec..c9a17fbc03e 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp @@ -49,6 +49,25 @@ inline std::ostream& operator<<(std::ostream& out, const impl_types& impl_type) return out; } +inline std::istream& operator>>(std::istream& is, impl_types& impl_type) { + std::string str; + is >> str; + if (str == "cpu") { + impl_type = impl_types::cpu; + } else if (str == "common") { + impl_type = impl_types::common; + } else if (str == "ocl") { + impl_type = impl_types::ocl; + } else if (str == "onednn") { + impl_type = impl_types::onednn; + } else if (str == "any") { + impl_type = impl_types::any; + } else { + throw ov::Exception{"Unsupported impl type: " + str}; + } + return is; +} + /// @brief Possible supported shape types. enum class shape_types : uint8_t { static_shape = 1 << 0, @@ -82,25 +101,35 @@ inline std::ostream& operator<<(std::ostream& out, const shape_types& shape_type return out; } -/// @brief Description of primitives implementation. -struct implementation_desc { - format::type output_format; ///< Output format. - std::string kernel_name; ///< GPU kernel name. - impl_types impl_type; ///< GPU implementation type. +} // namespace cldnn - implementation_desc() : - output_format(format::any), +namespace ov { +namespace intel_gpu { + +struct ImplementationDesc { + cldnn::format::type output_format; ///< Output format. + std::string kernel_name; ///< GPU kernel name. + cldnn::impl_types impl_type; ///< GPU implementation type. + + ImplementationDesc() : + output_format(cldnn::format::any), kernel_name(""), - impl_type(impl_types::any) {} + impl_type(cldnn::impl_types::any) {} - implementation_desc(format::type output_format, + ImplementationDesc(cldnn::format::type output_format, std::string kernel_name, - impl_types impl_type = impl_types::any) : + cldnn::impl_types impl_type = cldnn::impl_types::any) : output_format(output_format), kernel_name(kernel_name), impl_type(impl_type) {} }; -using implementation_forcing_map = std::map; +inline std::ostream& operator<<(std::ostream& out, const ImplementationDesc& desc) { + out << desc.impl_type << ":" << desc.kernel_name << ":" << desc.output_format; + return out; +} -} // namespace cldnn +using ImplForcingMap = std::map; + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp index 176ae7d4470..a966e5f20f6 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp @@ -5,11 +5,12 @@ #pragma once #include "device.hpp" -#include "engine_configuration.hpp" #include "event.hpp" #include "memory_caps.hpp" #include "memory_pool.hpp" #include "layout.hpp" +#include "execution_config.hpp" +#include "engine_configuration.hpp" #include #include @@ -91,9 +92,6 @@ public: /// Checks if the current engine supports speicied allocation @p type bool supports_allocation(allocation_type type) const; - /// Returns configuration of current engine - const engine_configuration& configuration() const { return _configuration; } - /// Returns device structure which represents stores device capabilities device_info get_device_info() const; @@ -129,22 +127,23 @@ public: uint64_t get_max_memory_size() const; /// Create stream object for current engine - virtual stream_ptr create_stream() const = 0; + virtual stream_ptr create_stream(const ExecutionConfig& config) const = 0; /// Creates stream object from user handle - virtual stream_ptr create_stream(void *handle) const = 0; + virtual stream_ptr create_stream(const ExecutionConfig& config, void *handle) const = 0; /// Returns service stream which can be used during program build and optimizations - virtual stream& get_program_stream() const = 0; + virtual stream& get_service_stream() const = 0; virtual allocation_type detect_usm_allocation_type(const void* memory) const = 0; #ifdef ENABLE_ONEDNN_FOR_GPU + /// Creates onednn engine object which shares device and context with current engine + virtual void create_onednn_engine(const ExecutionConfig& config) = 0; + /// Returns onednn engine object which shares device and context with current engine virtual dnnl::engine& get_onednn_engine() const = 0; #endif - /// Return GPU plugin internal task executor - const InferenceEngine::ITaskExecutor::Ptr get_task_executor(); /// Factory method which creates engine object with impl configured by @p engine_type /// @param engine_type requested engine type @@ -152,13 +151,7 @@ public: /// @param runtime_type requested execution runtime for the engine. @note some runtime/engine types configurations might be unsupported /// @param device specifies the device which the engine is created for /// @param configuration options for the engine - static std::shared_ptr create(engine_types engine_type, - runtime_types runtime_type, - const device::ptr device, - const engine_configuration& configuration = engine_configuration(), - const InferenceEngine::ITaskExecutor::Ptr task_executor = - std::make_shared( - InferenceEngine::CPUStreamsExecutor::Config())); + static std::shared_ptr create(engine_types engine_type, runtime_types runtime_type, const device::ptr device); /// Factory method which creates engine object with impl configured by @p engine_type /// @param engine_type requested engine type @@ -166,19 +159,12 @@ public: /// @param task_executor GPU plugin internal task executor /// @param configuration options for the engine /// @note engine is created for the first device returned by devices query - static std::shared_ptr create(engine_types engine_type, - runtime_types runtime_type, - const engine_configuration& configuration = engine_configuration(), - const InferenceEngine::ITaskExecutor::Ptr task_executor = - std::make_shared( - InferenceEngine::CPUStreamsExecutor::Config())); + static std::shared_ptr create(engine_types engine_type, runtime_types runtime_type); protected: /// Create engine for given @p device and @p configuration - engine(const device::ptr device, const engine_configuration& configuration, const InferenceEngine::ITaskExecutor::Ptr task_executor); - const InferenceEngine::ITaskExecutor::Ptr _task_executor; + engine(const device::ptr device); const device::ptr _device; - engine_configuration _configuration; mutable std::mutex _mutex; std::map> _memory_usage_map; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine_configuration.hpp index c468899bad2..a6963c04b94 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine_configuration.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine_configuration.hpp @@ -13,101 +13,23 @@ namespace cldnn { -/// @addtogroup cpp_api C++ API -/// @{ - -/// @defgroup cpp_engine Execution Engine -/// @{ - /// @brief Defines available engine types enum class engine_types : int32_t { ocl, }; +inline std::ostream& operator<<(std::ostream& os, engine_types type) { + switch (type) { + case engine_types::ocl: os << "ocl"; break; + default: os << "unknown"; break; + } + + return os; +} + /// @brief Defines available runtime types enum class runtime_types : int32_t { ocl, }; -/// @brief Defines available priority mode types -enum class priority_mode_types : int16_t { - disabled, - low, - med, - high -}; - -/// @brief Defines available throttle mode types -enum class throttle_mode_types : int16_t { - disabled, - low, - med, - high -}; - -/// @brief Defines supported queue types -enum class queue_types : int16_t { - in_order, - out_of_order -}; - -/// @brief Configuration parameters for created engine. -struct engine_configuration { - const bool enable_profiling; ///< Enable per-primitive profiling. - const queue_types queue_type; ///< Specifies type of queue used by the runtime - const std::string sources_dumps_dir; ///< Specifies a directory where sources of cldnn::program objects should be dumped. - ///< Empty by default (means no dumping). - const priority_mode_types priority_mode; ///< Priority mode (support of priority hints in command queue). If cl_khr_priority_hints extension - ///< is not supported by current OpenCL implementation, the value must be set to cldnn_priority_disabled. - - const throttle_mode_types throttle_mode; ///< Throttle mode (support of throttle hints in command queue). If cl_khr_throttle_hints extension - ///< is not supported by current OpenCL implementation, the value must be set to cldnn_throttle_disabled. - - bool use_memory_pool; ///< Enables memory usage optimization. memory objects will be reused when possible - ///< (switched off for older drivers then NEO). - bool use_unified_shared_memory; ///< Enables USM usage - const std::string kernels_cache_path; ///< Path to compiled kernels cache - uint16_t throughput_streams; ///< Number of queues/streams executed in parallel by GPU plugin - - const std::string tuning_cache_path; ///< Path to tuning kernel cache - - /// @brief Constructs engine configuration with specified options. - /// @param enable_profiling Enable per-primitive profiling. - /// @param queue_type Specifies type of queue used by the runtime - /// @param sources_dumps_dir Specifies a directory where sources of cldnn::program objects should be dumped - /// @param priority_mode Priority mode for all streams created within the engine - /// @param throttle_mode Throttle mode for all streams created within the engine - /// @param use_memory_pool Controls whether engine is allowed to reuse intermediate memory buffers whithin a network - /// @param use_unified_shared_memory If this option it true and device supports USM, then engine will use USM for all memory allocations - /// @param kernels_cache_path Path to existing directory where plugin can cache compiled kernels - /// @param n_threads Max number of host threads used in gpu plugin - /// @param throughput_streams Number of queues/streams executed in parallel by GPU plugin - /// @param tuning_cache_path Path to tuning kernel cache - engine_configuration( - bool enable_profiling = false, - queue_types queue_type = queue_types::out_of_order, - const std::string& sources_dumps_dir = std::string(), - priority_mode_types priority_mode = priority_mode_types::med, - throttle_mode_types throttle_mode = throttle_mode_types::med, - bool use_memory_pool = true, - bool use_unified_shared_memory = true, - const std::string& kernels_cache_path = "", - uint16_t throughput_streams = 1, - const std::string& tuning_cache_path = "cache.json") - : enable_profiling(enable_profiling) - , queue_type(queue_type) - , sources_dumps_dir(sources_dumps_dir) - , priority_mode(priority_mode) - , throttle_mode(throttle_mode) - , use_memory_pool(use_memory_pool) - , use_unified_shared_memory(use_unified_shared_memory) - , kernels_cache_path(kernels_cache_path) - , throughput_streams(throughput_streams) - , tuning_cache_path(tuning_cache_path) { } -}; - -/// @} - -/// @} - } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp new file mode 100644 index 00000000000..93c3805fe5e --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -0,0 +1,162 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "intel_gpu/runtime/internal_properties.hpp" +#include "intel_gpu/runtime/device.hpp" + +namespace ov { +namespace intel_gpu { + +enum class PropertyVisibility { + INTERNAL = 0, + PUBLIC = 1 +}; + +inline std::ostream& operator<<(std::ostream& os, const PropertyVisibility& visibility) { + switch (visibility) { + case PropertyVisibility::PUBLIC: os << "PUBLIC"; break; + case PropertyVisibility::INTERNAL: os << "INTERNAL"; break; + default: os << "UNKNOWN"; break; + } + + return os; +} + +class BaseValidator { +public: + using Ptr = std::shared_ptr; + virtual ~BaseValidator() = default; + virtual bool is_valid(const ov::Any& v) const = 0; +}; + +class FuncValidator : public BaseValidator { +public: +explicit FuncValidator(std::function func) : m_func(func) { } + bool is_valid(const ov::Any& v) const override { + return m_func(v); + } + +private: + std::function m_func; +}; + +// PropertyTypeValidator ensures that value can be converted to given property type +template +class PropertyTypeValidator : public BaseValidator { +public: + bool is_valid(const ov::Any& v) const override { + try { + v.as(); + return true; + } catch (ov::Exception&) { + return false; + } + } +}; + +class ExecutionConfig { +public: + ExecutionConfig(); + ExecutionConfig(std::initializer_list values) : ExecutionConfig() { set_property(ov::AnyMap(values)); } + explicit ExecutionConfig(const ov::AnyMap& properties) : ExecutionConfig() { set_property(properties); } + explicit ExecutionConfig(const ov::AnyMap::value_type& property) : ExecutionConfig() { set_property(property); } + + void set_default(); + void set_property(const ov::AnyMap& properties); + void set_user_property(const ov::AnyMap& properties); + Any get_property(const std::string& name) const; + bool is_set_by_user(const std::string& name) const; + bool is_supported(const std::string& name) const; + void register_property_impl(const std::pair& propertiy, PropertyVisibility visibility, BaseValidator::Ptr validator); + + template ::type = true> + void register_property_impl() { } + + template + void register_property_impl(const std::tuple, ValueT>& property, PropertyInitializer&&... properties) { + auto p = std::get<0>(property)(std::get<1>(property)); + auto v = std::dynamic_pointer_cast(std::make_shared>()); + register_property_impl(std::move(p), visibility, std::move(v)); + register_property_impl(properties...); + } + + template + typename std::enable_if::value, void>::type + register_property_impl(const std::tuple, ValueT, ValidatorT>& property, PropertyInitializer&&... properties) { + auto p = std::get<0>(property)(std::get<1>(property)); + auto v = std::dynamic_pointer_cast(std::make_shared(std::get<2>(property))); + register_property_impl(std::move(p), visibility, std::move(v)); + register_property_impl(properties...); + } + + template + typename std::enable_if, ValidatorT>::value, void>::type + register_property_impl(const std::tuple, ValueT, ValidatorT>& property, PropertyInitializer&&... properties) { + auto p = std::get<0>(property)(std::get<1>(property)); + auto v = std::dynamic_pointer_cast(std::make_shared(std::get<2>(property))); + register_property_impl(std::move(p), visibility, std::move(v)); + register_property_impl(properties...); + } + + template + void register_property(PropertyInitializer&&... properties) { + register_property_impl(properties...); + } + + template + util::EnableIfAllStringAny set_property(Properties&&... properties) { + set_property(ov::AnyMap{std::forward(properties)...}); + } + + template + util::EnableIfAllStringAny set_user_property(Properties&&... properties) { + set_user_property(ov::AnyMap{std::forward(properties)...}); + } + + template + bool is_set_by_user(const ov::Property& property) const { + return is_set_by_user(property.name()); + } + + template + T get_property(const ov::Property& property) const { + return get_property(property.name()).template as(); + } + + void apply_user_properties(const cldnn::device_info& info); + + std::string to_string() const; + +protected: + void apply_hints(const cldnn::device_info& info); + void apply_performance_hints(const cldnn::device_info& info); + void apply_priority_hints(const cldnn::device_info& info); + void apply_debug_options(const cldnn::device_info& info); + +private: + ov::AnyMap internal_properties; + ov::AnyMap user_properties; + + std::map supported_properties; + std::map property_validators; +}; + +} // namespace intel_gpu +} // namespace ov + +namespace cldnn { +using ov::intel_gpu::ExecutionConfig; +} // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp new file mode 100644 index 00000000000..e8cd3cad55d --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -0,0 +1,99 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/runtime/properties.hpp" +#include "openvino/runtime/intel_gpu/properties.hpp" + +#include "intel_gpu/primitives/implementation_desc.hpp" +namespace ov { +namespace intel_gpu { + +/** + * @brief Read-only property to get GPU driver version + */ +static constexpr Property driver_version{"GPU_DRIVER_VERSION"}; + +/** + * @brief Read-only property to get GPU driver version + */ +static constexpr Property device_id{"GPU_DEVICE_ID"}; + +enum class QueueTypes : int16_t { + in_order, + out_of_order +}; + +inline std::ostream& operator<<(std::ostream& os, const QueueTypes& val) { + switch (val) { + case QueueTypes::in_order: os << "in-order"; break; + case QueueTypes::out_of_order: os << "out-of-order"; break; + default: os << "unknown"; + } + + return os; +} + +/** + * @brief Defines queue type that must be used for model execution + */ +static constexpr Property queue_type{"GPU_QUEUE_TYPE"}; + +static constexpr Property enable_memory_pool{"GPU_ENABLE_MEMORY_POOL"}; +static constexpr Property optimize_data{"GPU_OPTIMIZE_DATA"}; +static constexpr Property allow_static_input_reorder{"GPU_ALLOW_STATIC_INPUT_REORDER"}; +static constexpr Property partial_build_program{"GPU_PARTIAL_BUILD"}; +static constexpr Property allow_new_shape_infer{"GPU_ALLOW_NEW_SHAPE_INFER"}; +static constexpr Property dump_graphs{"GPU_DUMP_GRAPHS"}; +static constexpr Property, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"}; + +/// @brief Tuning mode. +enum class TuningMode { + /// @brief Tuning is disabled. + tuning_disabled, + + /// @brief Tuning using the cached data (no on-line tuning for non-existing data). + tuning_use_cache, + + /// @brief Tuning using the cached data if exist, tune and update cache otherwise. + tuning_tune_and_cache, + + /// @brief Tuning using the cached data and update tasks. + /// @details Performs updating tasks like removal of invalid caches, promoting to new format, etc. + /// No tuning for non-existing data. + tuning_use_and_update, + + /// @brief Retune the cache data even if it exists. + tuning_retune_and_cache +}; + +struct TuningConfig { + TuningMode mode; + std::string cache_file_path; + + TuningConfig() : mode(TuningMode::tuning_disabled), cache_file_path("") {} +}; + +inline std::ostream& operator<<(std::ostream& os, const TuningConfig& val) { + os << val.cache_file_path; + return os; +} + +static constexpr Property tuning_config{"GPU_TUNING_CONFIG"}; + +static constexpr Property force_implementations{"GPU_FORCE_IMPLEMENTATIONS"}; +static constexpr Property config_file{"CONFIG_FILE"}; +static constexpr Property enable_lp_transformations{"LP_TRANSFORMS_MODE"}; +static constexpr Property enable_dynamic_batch{"DYN_BATCH_ENABLED"}; +static constexpr Property max_dynamic_batch{"DYN_BATCH_LIMIT"}; +static constexpr Property exclusive_async_requests{"EXCLUSIVE_ASYNC_REQUESTS"}; +static constexpr Property nv12_two_inputs{"GPU_NV12_TWO_INPUTS"}; + +} // namespace intel_gpu +} // namespace ov + +namespace cldnn { +using ov::intel_gpu::QueueTypes; +} // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp index d542af06d73..b091451b5a2 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp @@ -9,6 +9,8 @@ #include #include +#include "kernel.hpp" + namespace cldnn { struct primitive_impl; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/stream.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/stream.hpp index 7f2a58f5d8d..505301901b3 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/stream.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/stream.hpp @@ -7,6 +7,7 @@ #include "event.hpp" #include "kernel.hpp" #include "kernel_args.hpp" +#include "execution_config.hpp" #include #include @@ -20,7 +21,7 @@ namespace cldnn { class stream { public: using ptr = std::shared_ptr; - explicit stream(queue_types queue_type) : queue_type(queue_type) {} + explicit stream(QueueTypes queue_type) : queue_type(queue_type) {} virtual ~stream() = default; virtual void flush() const = 0; @@ -39,16 +40,16 @@ public: virtual event::ptr create_user_event(bool set) = 0; virtual event::ptr create_base_event() = 0; - queue_types get_queue_type() const { return queue_type; } + QueueTypes get_queue_type() const { return queue_type; } - static queue_types detect_queue_type(engine_types engine_type, void* queue_handle); + static QueueTypes detect_queue_type(engine_types engine_type, void* queue_handle); #ifdef ENABLE_ONEDNN_FOR_GPU - virtual dnnl::stream& get_onednn_stream() const = 0; + virtual dnnl::stream& get_onednn_stream() = 0; #endif protected: - queue_types queue_type; + QueueTypes queue_type; }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/compilation_context.cpp b/src/plugins/intel_gpu/src/graph/compilation_context.cpp index e805800bc18..898f12431a0 100644 --- a/src/plugins/intel_gpu/src/graph/compilation_context.cpp +++ b/src/plugins/intel_gpu/src/graph/compilation_context.cpp @@ -12,8 +12,8 @@ class CompilationContext : public ICompilationContext { public: using compilation_queue_t = InferenceEngine::ThreadSafeQueue; - CompilationContext(cldnn::engine& engine, size_t program_id) { - _kernels_cache = cldnn::make_unique(engine, program_id, kernel_selector::KernelBase::get_db().get_batch_header_str()); + CompilationContext(cldnn::engine& engine, const ExecutionConfig& config, size_t program_id) { + _kernels_cache = cldnn::make_unique(engine, config, program_id, nullptr, kernel_selector::KernelBase::get_db().get_batch_header_str()); _worker = std::thread([this](){ while (!_stop_compilation) { CompilationContext::Task task; @@ -47,8 +47,8 @@ private: std::atomic_bool _stop_compilation{false}; }; -std::unique_ptr ICompilationContext::create(cldnn::engine& engine, size_t program_id) { - return cldnn::make_unique(engine, program_id); +std::unique_ptr ICompilationContext::create(cldnn::engine& engine, const ExecutionConfig& config, size_t program_id) { + return cldnn::make_unique(engine, config, program_id); } } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp index fcda0e6c057..48aa63479a2 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp @@ -46,7 +46,7 @@ void add_required_reorders::add_reorder(program& p, program_node* node, program_ } void add_required_reorders::run(program& p) { - bool optimize_data = p.get_options().get()->enabled(); + bool optimize_data = p.get_config().get_property(ov::intel_gpu::optimize_data); auto usr_itr = p.get_processing_order().begin(); while (usr_itr != p.get_processing_order().end()) { auto& usr = *usr_itr++; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp index caf7c36abc5..1c77bba4493 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp @@ -26,7 +26,7 @@ void compile_graph::run(program& p) { } } - auto task_executor = p.get_engine().get_task_executor(); + auto task_executor = p.get_task_executor(); auto& proc_order = p.get_processing_order(); std::vector tasks; std::exception_ptr exception; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp index 9b715f889d4..d2c3292f19c 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp @@ -400,9 +400,9 @@ void graph_initializations::handle_dynamic_lstm_node(program& p, lstm_dynamic_no } void graph_initializations::set_outputs(program& p) { - auto outputs_option = p.get_options().get(); - if (!outputs_option->outputs.empty()) { - for (auto const& output : outputs_option->outputs) { + auto custom_outputs = p.get_config().get_property(ov::intel_gpu::custom_outputs); + if (!custom_outputs.empty()) { + for (auto const& output : custom_outputs) { auto o_node = p.get_node_ptr(output); o_node->set_output(true); p.outputs.push_back(o_node.get()); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/pre_replace_deconv.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/pre_replace_deconv.cpp index d6802afb3d8..88c95a01256 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/pre_replace_deconv.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/pre_replace_deconv.cpp @@ -29,7 +29,7 @@ void pre_replace_deconv::run(program& p) { if (node->is_type()) { if (node->is_dynamic()) continue; - if (!p.get_options().get()->enabled()) + if (!p.get_config().get_property(ov::intel_gpu::optimize_data)) continue; auto& deconv_node = node->as(); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index 2810d5cb45c..640b6c21c23 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -63,7 +63,7 @@ struct concat_in_place_optimization : pattern_match_optimization_typedis_output() && !get_program().is_debug_build()) || - !input.first->is_padding_supported(concat_axis, lower_padd_in_axis)) + if (input.first->is_output() || !input.first->is_padding_supported(concat_axis, lower_padd_in_axis)) return false; // TODO: Investigate if this condition is needed @@ -306,7 +305,6 @@ static bool can_reshape_be_optimized(const reshape_node& node) { // ToDo remove friendship relation from program_node void prepare_buffer_fusing::run(program& p) { - bool is_debug = p.get_options().get()->enabled(); /* We need to take care of proper ordering by types. 1. Concats @@ -348,10 +346,10 @@ void prepare_buffer_fusing::run(program& p) { if (!can_optimize(node)) continue; // zero copy - program_helpers::do_for_types(*node, [&p, is_debug](crop_node& node) { + program_helpers::do_for_types(*node, [&p](crop_node& node) { // if the node is marked as network output, prevent optimizations which would affect a form of its output, // unless debug flag is set - if (node.is_output() && !is_debug) + if (node.is_output()) return; // do not optimize when next node is concatenation which is not output diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index abbfec69189..c48355fd951 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -227,13 +227,12 @@ void prepare_primitive_fusing::fuse_reorders(program &p) { } void prepare_primitive_fusing::fuse_activations(program &p) { - bool is_debug = p.get_options().get()->enabled(); std::map>> fusing_history; bool use_onednn_impls = false; #ifdef ENABLE_ONEDNN_FOR_GPU auto& engine = p.get_engine(); - if (engine.get_device_info().supports_immad && engine.configuration().queue_type == queue_types::in_order) + if (engine.get_device_info().supports_immad && p.get_config().get_property(ov::intel_gpu::queue_type) == QueueTypes::in_order) use_onednn_impls = true; #endif @@ -242,7 +241,7 @@ void prepare_primitive_fusing::fuse_activations(program &p) { auto node_itr = itr++; auto& node = (*node_itr); - program_helpers::do_for_types(*node, [&p, &is_debug, &fusing_history, &use_onednn_impls](activation_node& node) { + program_helpers::do_for_types(*node, [&p, &fusing_history, &use_onednn_impls](activation_node& node) { auto& input = node.input(); auto id = node.id(); // Restrictions: @@ -251,7 +250,7 @@ void prepare_primitive_fusing::fuse_activations(program &p) { // - no activation additional input // - input was optimized // - can't have fused primitives - if (node.has_padded_dependency() || (input.is_output() && !is_debug) || node.is_output() || + if (node.has_padded_dependency() || input.is_output() || node.is_output() || node.get_dependencies().size() != 1 || input.can_be_optimized() || node.is_constant() || node.has_fused_primitives()) return; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp index ee7fa17d9f5..03f72cc7886 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp @@ -24,7 +24,7 @@ void propagate_constants::run(program& p) { handle_constant(p, *node); } - auto&& to_replace = calculate(p.get_engine(), p.get_options()); + auto&& to_replace = calculate(p.get_engine(), p.get_config(), p.get_task_executor()); // remove all nodes which are no longer relevant, i.e. nodes which: // 1. are constants, and @@ -108,13 +108,16 @@ bool propagate_constants::has_non_const_user(program_node& node) const { return false; } -std::list> propagate_constants::calculate(engine& engine, build_options bo) { +std::list> propagate_constants::calculate(engine& engine, + const ExecutionConfig& config, + std::shared_ptr task_executor) { if (!has_non_trivial_constants) return {}; - bo.set_option(build_option::optimize_data(false)); - bo.set_option(build_option::outputs(const_outputs)); - network::ptr net = network::build_network(engine, nodes, bo, true); + ExecutionConfig cf_config = config; + cf_config.set_property(ov::intel_gpu::optimize_data(false)); + cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs)); + network::ptr net = network::build_network(engine, nodes, cf_config, task_executor, true); for (auto& cin : const_inputs) net->set_input_data(cin->id(), cin->get_attached_memory_ptr()); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp index 73a59a9844e..51a9e06450c 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp @@ -30,6 +30,7 @@ void select_preferred_formats::run(program& p) { return; #ifdef ENABLE_ONEDNN_FOR_GPU + engine.create_onednn_engine(p.get_config()); for (auto n : p.get_processing_order()) { // Onednn primitive descriptor creation may fail, for example, due to asymmetric weight. try { diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp index f2ec1123024..2ce94fe7dc2 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp @@ -75,10 +75,10 @@ public: uint32_t dilation_x = dilation.size() >= 1 ? dilation[dilation.size() - 1] : 1; params.dilation = {dilation_x, dilation_y, dilation_z}; - const auto& tuning_config = impl_param.get_program().get_options().get(); + const auto& tuning_config = impl_param.get_program().get_config().get_property(ov::intel_gpu::tuning_config); - if (tuning_config->config.mode == tuning_mode::tuning_tune_and_cache || - tuning_config->config.mode == tuning_mode::tuning_retune_and_cache) { + if (tuning_config.mode == ov::intel_gpu::TuningMode::tuning_tune_and_cache || + tuning_config.mode == ov::intel_gpu::TuningMode::tuning_retune_and_cache) { optional_params.tuningParams.runner = std::make_shared(impl_param.get_program().get_engine(), impl_param.get_program().get_id(), true); } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp index a8db3a7a7f5..78adc6e7fd6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp @@ -166,10 +166,10 @@ public: auto& kernel_selector = kernel_selector::convolution_kernel_selector::Instance(); - const auto& tuning_config = arg.get_program().get_options().get(); + const auto& tuning_config = impl_param.get_program().get_config().get_property(ov::intel_gpu::tuning_config); - if (tuning_config->config.mode == tuning_mode::tuning_tune_and_cache || - tuning_config->config.mode == tuning_mode::tuning_retune_and_cache) { + if (tuning_config.mode == ov::intel_gpu::TuningMode::tuning_tune_and_cache || + tuning_config.mode == ov::intel_gpu::TuningMode::tuning_retune_and_cache) { conv_optional_params.tuningParams.runner = std::make_shared(arg.get_program().get_engine(), arg.get_program().get_id(), true, true); } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp index 1f8598c6bab..a0b85b36bac 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp @@ -108,6 +108,7 @@ public: static std::unique_ptr create(const concatenation_node& arg, const kernel_impl_params& impl_params) { auto& engine = impl_params.prog->get_engine(); + auto& config = impl_params.prog->get_config(); if (arg.can_be_optimized()) return make_unique(engine); auto prim = impl_params.typed_desc(); @@ -116,7 +117,7 @@ public: std::shared_ptr dummy = nullptr; - return cldnn::make_unique(engine, dummy, attr, *desc); + return cldnn::make_unique(engine, config, dummy, attr, *desc); } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp index 8007edcc1cc..96ce8acbbaf 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp @@ -190,11 +190,12 @@ public: static std::unique_ptr create(const convolution_node& arg, const kernel_impl_params& impl_params) { auto& engine = impl_params.prog->get_engine(); + auto& config = impl_params.prog->get_config(); auto desc = get_convolution_descriptor(impl_params); auto attr = get_primitive_attributes(arg); dnnl::primitive_desc prim_desc{&desc->data, attr.get(), engine.get_onednn_engine(), nullptr}; - return cldnn::make_unique(engine, desc, attr, prim_desc, get_weights_reorder(impl_params, prim_desc, arg.get_transposed())); + return cldnn::make_unique(engine, config, desc, attr, prim_desc, get_weights_reorder(impl_params, prim_desc, arg.get_transposed())); } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp index 69998f8f94e..ba52eb32d0f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp @@ -115,11 +115,12 @@ public: static std::unique_ptr create(const deconvolution_node& arg, const kernel_impl_params& impl_params) { auto& engine = impl_params.prog->get_engine(); + auto& config = impl_params.prog->get_config(); auto desc = get_deconvolution_descriptor(impl_params); auto attr = get_primitive_attributes(arg); dnnl::primitive_desc prim_desc{&desc->data, attr.get(), engine.get_onednn_engine(), nullptr}; - return cldnn::make_unique(engine, desc, attr, prim_desc, get_weights_reorder(impl_params, prim_desc)); + return cldnn::make_unique(engine, config, desc, attr, prim_desc, get_weights_reorder(impl_params, prim_desc)); } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp index 8042e8e3b23..d29102f787d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp @@ -177,11 +177,12 @@ public: static std::unique_ptr create(const fully_connected_node& arg, const kernel_impl_params& impl_params) { auto& engine = impl_params.prog->get_engine(); + auto& config = impl_params.prog->get_config(); auto desc = get_fully_connected_descriptor(impl_params); auto attr = arg.get_onednn_primitive_attributes(); dnnl::primitive_desc prim_desc{&desc->data, attr.get(), engine.get_onednn_engine(), nullptr}; - return cldnn::make_unique(engine, desc, attr, prim_desc, get_weights_reorder(impl_params, prim_desc)); + return cldnn::make_unique(engine, config, desc, attr, prim_desc, get_weights_reorder(impl_params, prim_desc)); } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp index 736e38f235a..23e911dc7c2 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp @@ -158,11 +158,12 @@ public: static std::unique_ptr create(const gemm_node& arg, const kernel_impl_params& impl_params) { auto& engine = impl_params.prog->get_engine(); + auto& config = impl_params.prog->get_config(); auto desc = get_gemm_descriptor(impl_params); auto attr = arg.get_onednn_primitive_attributes(); dnnl::primitive_desc prim_desc{&desc->data, attr.get(), engine.get_onednn_engine(), nullptr}; - return cldnn::make_unique(engine, desc, attr, prim_desc); + return cldnn::make_unique(engine, config, desc, attr, prim_desc); } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp index fc8681dc52a..c7088692d4e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp @@ -102,11 +102,12 @@ public: static std::unique_ptr create(const pooling_node& arg, const kernel_impl_params& impl_params) { auto& engine = impl_params.prog->get_engine(); + auto& config = impl_params.prog->get_config(); auto desc = get_pooling_descriptor(impl_params); auto attr = arg.get_onednn_primitive_attributes(); dnnl::primitive_desc prim_desc{&desc->data, attr.get(), engine.get_onednn_engine(), nullptr}; - return cldnn::make_unique(engine, desc, attr, prim_desc); + return cldnn::make_unique(engine, config, desc, attr, prim_desc); } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 617ea6ee1d8..6f33466ca2d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -40,6 +40,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { std::unordered_map> _args; typed_primitive_onednn_impl(const engine& engine, + const ExecutionConfig& config, std::shared_ptr desc, std::shared_ptr attrs, const PrimDescType& pd, @@ -49,7 +50,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _desc(desc), _attrs(attrs), _pd(pd) { - build_primitive(); + build_primitive(config); } typed_primitive_onednn_impl(const engine& engine) @@ -362,8 +363,8 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { } private: - std::string get_cache_directory() const { - auto path = _engine->configuration().kernels_cache_path; + std::string get_cache_directory(const ExecutionConfig& config) const { + auto path = config.get_property(ov::cache_dir); if (path.empty()) { return {}; } @@ -374,8 +375,8 @@ private: return path; } - std::string generate_cache_path_from_key(std::vector key) const { - auto path = get_cache_directory(); + std::string generate_cache_path_from_key(const ExecutionConfig& config, std::vector key) const { + auto path = get_cache_directory(config); if (path.empty()) { return {}; } @@ -385,8 +386,8 @@ private: return path + std::to_string(hash) + ".onednn.cl_cache"; } - void build_primitive() { - auto cache_outpath = get_cache_directory(); + void build_primitive(const ExecutionConfig& config) { + auto cache_outpath = get_cache_directory(config); if (const char* env_p = std::getenv("OV_GPU_CACHE_MODEL")) { if (env_p[0] == '1') { @@ -403,7 +404,7 @@ private: std::vector cache; { std::lock_guard lock(cacheAccessMutex); - cache = ov::util::load_binary(generate_cache_path_from_key(key)); + cache = ov::util::load_binary(generate_cache_path_from_key(config, key)); } if (cache.empty()) { @@ -412,7 +413,7 @@ private: { std::lock_guard lock(cacheAccessMutex); - ov::util::save_binary(generate_cache_path_from_key(key), cache); + ov::util::save_binary(generate_cache_path_from_key(config, key), cache); } } else { _prim = PrimType(_pd, cache); @@ -563,9 +564,8 @@ protected: event::ptr execute_impl(const std::vector& /* events */, typed_primitive_inst& instance) override { auto& network = instance.get_network(); - auto& engine = network.get_engine(); auto& stream = network.get_stream(); - auto profiling = engine.configuration().enable_profiling; + auto profiling = network.get_config().get_property(ov::enable_profiling); auto net_id = network.get_id(); event::ptr event; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp index 341a2fdb786..32c11e2b406 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp @@ -118,11 +118,12 @@ public: static std::unique_ptr create(const reduce_node& arg, const kernel_impl_params& impl_params) { auto& engine = impl_params.prog->get_engine(); + auto& config = impl_params.prog->get_config(); auto desc = get_reduction_descriptor(impl_params); auto attr = arg.get_onednn_primitive_attributes(); dnnl::primitive_desc prim_desc{&desc->data, attr.get(), engine.get_onednn_engine(), nullptr}; - return cldnn::make_unique(engine, desc, attr, prim_desc); + return cldnn::make_unique(engine, config, desc, attr, prim_desc); } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp index 14d5b782bed..26a96abdd07 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp @@ -86,12 +86,13 @@ public: static std::unique_ptr create(const reorder_node& arg, const kernel_impl_params& impl_params) { auto& engine = impl_params.prog->get_engine(); + auto& config = impl_params.prog->get_config(); auto attr = arg.get_onednn_primitive_attributes(); auto desc = get_reorder_descriptor(impl_params, *attr, impl_params.prog->get_engine()); std::shared_ptr dummy = nullptr; - return cldnn::make_unique(engine, dummy, attr, *desc); + return cldnn::make_unique(engine, config, dummy, attr, *desc); } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp index cdeddcd17c3..16ac5d767cf 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp @@ -113,7 +113,7 @@ dnnl::memory::desc create_memory_desc_from_format_string(dnnl::memory::dims dims template cldnn::memory::ptr convert_zp_data_to_s32(const memory::ptr zp_memory) { auto engine = zp_memory->get_engine(); - auto& stream = engine->get_program_stream(); + auto& stream = engine->get_service_stream(); auto zp_s32_layout = zp_memory->get_layout(); zp_s32_layout.data_type = data_types::i32; @@ -493,7 +493,7 @@ template bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val) { auto ptr = node.get_attached_memory_ptr(); auto engine = ptr->get_engine(); - auto& stream = engine->get_program_stream(); + auto& stream = engine->get_service_stream(); auto num_elems = node.get_output_layout().count(); mem_lock old_data {ptr, stream}; auto val = old_data[0]; diff --git a/src/plugins/intel_gpu/src/graph/include/compilation_context.hpp b/src/plugins/intel_gpu/src/graph/include/compilation_context.hpp index bb8263ab862..35d23317d5b 100644 --- a/src/plugins/intel_gpu/src/graph/include/compilation_context.hpp +++ b/src/plugins/intel_gpu/src/graph/include/compilation_context.hpp @@ -17,7 +17,7 @@ public: virtual void cancel() noexcept = 0; virtual ~ICompilationContext() = default; - static std::unique_ptr create(cldnn::engine& engine, size_t program_id); + static std::unique_ptr create(cldnn::engine& engine, const ExecutionConfig& config, size_t program_id); }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/include/condition_inst.h b/src/plugins/intel_gpu/src/graph/include/condition_inst.h index 48ef812478f..637c0e21b4c 100644 --- a/src/plugins/intel_gpu/src/graph/include/condition_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/condition_inst.h @@ -26,7 +26,7 @@ private: add_or_change_input_layout(node); _program = program::build_program(node.get_program().get_engine(), _topology, - node.get_program().get_options(), + node.get_program().get_config(), true); // rebuild program } program::ptr get() const { return _program; } diff --git a/src/plugins/intel_gpu/src/graph/include/kernel_selector_helper.h b/src/plugins/intel_gpu/src/graph/include/kernel_selector_helper.h index 87235d08749..f197d77d883 100644 --- a/src/plugins/intel_gpu/src/graph/include/kernel_selector_helper.h +++ b/src/plugins/intel_gpu/src/graph/include/kernel_selector_helper.h @@ -101,7 +101,7 @@ kernel_selector::data_layout to_data_layout(format f); cldnn::format from_data_layout(kernel_selector::data_layout l); kernel_selector::weights_layout to_weights_layout(format f, bool is_grouped); cldnn::format::type from_weights_layout(kernel_selector::weights_layout l); -kernel_selector::tuning_mode to_tuning_mode(cldnn::tuning_mode mode); +kernel_selector::tuning_mode to_tuning_mode(ov::intel_gpu::TuningMode mode); kernel_selector::data_tensor convert_data_tensor(const layout& l, const tensor view_offset = tensor {}); kernel_selector::weights_tensor convert_weights_tensor(const layout& l, bool is_grouped = false); layout from_weights_tensor(const kernel_selector::weights_tensor& t); diff --git a/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h b/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h index 9122db295ec..860db03d862 100644 --- a/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h +++ b/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h @@ -199,7 +199,7 @@ public: void set_optimization_attribute(optimization_attributes_type attribute, int32_t val); optimization_attributes get_optimization_attributes() { return _optimization_attributes; } - void set_implementation_forcing(const implementation_forcing_map& map); + void set_implementation_forcing(const ov::intel_gpu::ImplForcingMap& map); void update_formats_map(const convolution_node& node); bool is_format_optimized(const convolution_node& node, const format& format, bool use_weak_restrictions = false); diff --git a/src/plugins/intel_gpu/src/graph/include/loop_inst.h b/src/plugins/intel_gpu/src/graph/include/loop_inst.h index 8795d4d7058..091ebe3fd8f 100644 --- a/src/plugins/intel_gpu/src/graph/include/loop_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/loop_inst.h @@ -311,10 +311,10 @@ public: output_names.insert(get_condition_id()); } - auto opts = get_program().get_options(); std::vector output_names_vec(output_names.begin(), output_names.end()); - opts.set_option(build_option::outputs(output_names_vec)); - body_program = program::build_program(get_program().get_engine(), body, opts, false, false, true); + auto config = get_program().get_config(); + config.set_property(ov::intel_gpu::custom_outputs(output_names_vec)); + body_program = program::build_program(get_program().get_engine(), body, config, false, false, true); } const primitive_id& get_trip_count_id() const { return get_primitive()->trip_count_id; } diff --git a/src/plugins/intel_gpu/src/graph/include/pass_manager.h b/src/plugins/intel_gpu/src/graph/include/pass_manager.h index fec91bdb680..db55e105742 100644 --- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h +++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h @@ -276,7 +276,9 @@ public: private: void run(program& p) override; - std::list> calculate(engine& engine, build_options bo); + std::list> calculate(engine& engine, + const ExecutionConfig& config, + std::shared_ptr task_executor); bool has_non_const_user(program_node& node) const; void handle_constant(program& prog, program_node& node); void add_constant(program& prog, program_node& node); diff --git a/src/plugins/intel_gpu/src/graph/include/program_dump_graph.h b/src/plugins/intel_gpu/src/graph/include/program_dump_graph.h index 8e28be96927..d64f85c3199 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_dump_graph.h +++ b/src/plugins/intel_gpu/src/graph/include/program_dump_graph.h @@ -8,8 +8,7 @@ #include namespace cldnn { -std::string get_dir_path(build_options); -std::string get_serialization_network_name(build_options); +std::string get_dir_path(const ExecutionConfig& config); void dump_graph_optimized(std::ofstream&, const program&); void dump_graph_processing_order(std::ofstream&, const program&); diff --git a/src/plugins/intel_gpu/src/graph/kernel_runner.cpp b/src/plugins/intel_gpu/src/graph/kernel_runner.cpp index 361bec28129..55507da5604 100644 --- a/src/plugins/intel_gpu/src/graph/kernel_runner.cpp +++ b/src/plugins/intel_gpu/src/graph/kernel_runner.cpp @@ -162,7 +162,7 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern std::vector kernel_runner::run_kernels(const kernel_selector::KernelsData& kernels_data) { std::vector run_times; - stream::ptr stream = _engine.create_stream(); + stream::ptr stream = _engine.create_stream({}); int num_of_kernels_to_run = static_cast(kernels_data.size()); int num_of_kernels_run = 0; @@ -174,7 +174,7 @@ std::vector kernel_runner::run_kernels(const kernel_se batch_end = batch_start + current_compilation_batch; std::vector kernels; - kernels_cache cache(_engine, program_id); + kernels_cache cache(_engine, {}, program_id); for (auto it = batch_start; it < batch_end; it++) { auto kernel_id = cache.set_kernel_source(it->kernels[0].code.kernelString, false); diff --git a/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp index 8b48ee1943a..50d6a2244b6 100644 --- a/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp @@ -819,17 +819,17 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) { } } -kernel_selector::tuning_mode to_tuning_mode(cldnn::tuning_mode mode) { +kernel_selector::tuning_mode to_tuning_mode(ov::intel_gpu::TuningMode mode) { switch (mode) { - case cldnn::tuning_mode::tuning_disabled: + case ov::intel_gpu::TuningMode::tuning_disabled: return kernel_selector::tuning_mode::TUNING_DISABLED; - case cldnn::tuning_mode::tuning_use_cache: + case ov::intel_gpu::TuningMode::tuning_use_cache: return kernel_selector::tuning_mode::TUNING_USE_CACHE; - case cldnn::tuning_mode::tuning_tune_and_cache: + case ov::intel_gpu::TuningMode::tuning_tune_and_cache: return kernel_selector::tuning_mode::TUNING_TUNE_AND_CACHE; - case cldnn::tuning_mode::tuning_use_and_update: + case ov::intel_gpu::TuningMode::tuning_use_and_update: return kernel_selector::tuning_mode::TUNING_USE_AND_UPDATE; - case cldnn::tuning_mode::tuning_retune_and_cache: + case ov::intel_gpu::TuningMode::tuning_retune_and_cache: return kernel_selector::tuning_mode::TUNING_RETUNE_AND_CACHE; default: return kernel_selector::tuning_mode::TUNING_DISABLED; @@ -1041,8 +1041,7 @@ void set_params(const kernel_impl_params& param_info, kernel_selector::params& p params.engineInfo.supportedSimdSizes = device_info.supported_simd_sizes; params.engineInfo.vendor_id = device_info.vendor_id; - auto impl_forcing_bo = program->get_options().get(); - const auto& impl_forcing = impl_forcing_bo->forcing; + auto impl_forcing = program->get_config().get_property(ov::intel_gpu::force_implementations); if (impl_forcing.count(param_info.desc->id) != 0) { params.forceImplementation = impl_forcing.at(param_info.desc->id).kernel_name; @@ -1051,14 +1050,14 @@ void set_params(const kernel_impl_params& param_info, kernel_selector::params& p void set_optional_params(const program& program, kernel_selector::optional_params& params) { params.meaningfulKernelsNames = false; - params.allowStaticInputReordering = program.get_options().get()->enabled() || - program.get_options().get()->enabled(); + params.allowStaticInputReordering = program.get_config().get_property(ov::intel_gpu::optimize_data) || + program.get_config().get_property(ov::intel_gpu::allow_static_input_reorder); params.allowInputReordering = false; params.allowOutputReordering = false; - const auto& tuning_config = program.get_options().get(); - params.tuningParams.mode = to_tuning_mode(tuning_config->config.mode); - params.tuningParams.cacheFilePath = tuning_config->config.cache_file_path; + const auto& tuning_config = program.get_config().get_property(ov::intel_gpu::tuning_config); + params.tuningParams.mode = to_tuning_mode(tuning_config.mode); + params.tuningParams.cacheFilePath = tuning_config.cache_file_path; } void kernel_impl_params::save(BinaryOutputBuffer& ob) const { diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 17f44b9e188..02d46282fa9 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -1416,7 +1416,7 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format const size_t kBatchNum = scores_layout.batch(); const size_t kClassNum = scores_layout.feature(); const size_t kNStreams = - static_cast(node.get_program().get_engine().configuration().throughput_streams); + static_cast(node.get_program().get_config().get_property(ov::streams::num)); const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast(8)) * kNStreams; preferred_impl = (kKeyValue > 64) ? impl_types::ocl : impl_types::cpu; } @@ -1668,7 +1668,7 @@ format layout_optimizer::get_preferred_format(program_node& node) { auto output_layout = node.get_output_layout(); bool use_onednn_impls = _optimization_attributes.use_onednn_impls; - bool allow_new_shape_infer = node.get_program().get_options().get()->enabled(); + bool allow_new_shape_infer = node.get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer); if (allow_new_shape_infer) { if (node.is_type()) @@ -2013,7 +2013,7 @@ bool layout_optimizer::is_format_optimized(const deconvolution_node& node, const } } -void layout_optimizer::set_implementation_forcing(const implementation_forcing_map& map) { +void layout_optimizer::set_implementation_forcing(const ov::intel_gpu::ImplForcingMap& map) { for (const auto& kv : map) { _forcing_map.emplace(kv.first, std::make_pair(kv.second.output_format, kv.second.impl_type)); } diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index ad2baec6a1c..415c0ca48be 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -277,8 +277,9 @@ static uint32_t get_unique_net_id() { Network will always have net_id = 0 when it will be cldnn internal micronetwork (created i.e by propagate_constants opt pass). */ -network::network(program::ptr program, stream::ptr stream, bool is_internal, bool is_primary_stream) +network::network(program::ptr program, const ExecutionConfig& config, stream::ptr stream, bool is_internal, bool is_primary_stream) : _program(program) + , _config(config) , _engine(program->get_engine()) , _stream(stream) , _memory_pool(new memory_pool(program->get_engine())) @@ -304,34 +305,42 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo if (is_dynamic()) { GPU_DEBUG_DEFINE_MEM_LOGGER("dynamic_network_initialization"); - _kernels_cache = std::unique_ptr(new kernels_cache(program->get_engine(), program->get_id(), - kernel_selector::KernelBase::get_db().get_batch_header_str())); + _kernels_cache = std::unique_ptr(new kernels_cache(program->get_engine(), + program->get_config(), + program->get_id(), + program->get_task_executor(), + kernel_selector::KernelBase::get_db().get_batch_header_str())); _impls_cache = std::unique_ptr(new ImplementationsCache(_impls_cache_capacity)); _in_mem_kernels_cache = std::unique_ptr(new KernelsCache(_in_mem_kernels_cache_capacity)); - _compilation_context = std::move(ICompilationContext::create(program->get_engine(), program->get_id())); + _compilation_context = std::move(ICompilationContext::create(program->get_engine(), program->get_config(), program->get_id())); } } network::network(engine& engine, const topology& topo, - const build_options& options, + const ExecutionConfig& config, bool is_internal) - : network(program::build_program(engine, topo, options, is_internal), engine.create_stream(), is_internal) {} + : network(program::build_program(engine, topo, config, is_internal), config, engine.create_stream(config), is_internal) {} network::network(engine& engine, const std::set>& nodes, - const build_options& options, + const ExecutionConfig& config, + std::shared_ptr task_executor, bool is_internal) - : network(program::build_program(engine, nodes, options, is_internal), engine.create_stream(), is_internal) {} + : network(program::build_program(engine, nodes, config, task_executor, is_internal), config, engine.create_stream(config), is_internal) {} network::network(program::ptr program, uint16_t stream_id) - : network(program, program->get_engine().create_stream(), false, stream_id == 0) {} + : network(program, program->get_config(), program->get_engine().create_stream(program->get_config()), false, stream_id == 0) {} network::network(program::ptr program, stream::ptr stream, uint16_t stream_id) - : network(program, stream, false, stream_id == 0) {} + : network(program, program->get_config(), stream, false, stream_id == 0) {} network::network(cldnn::BinaryInputBuffer& ib, stream::ptr stream, engine& engine, uint16_t stream_id) + : network(ib, ExecutionConfig{}, stream, engine, stream_id) {} + +network::network(cldnn::BinaryInputBuffer& ib, const ExecutionConfig& config, stream::ptr stream, engine& engine, uint16_t stream_id) : _program(nullptr) + , _config(config) , _engine(engine) , _stream(stream) , _memory_pool(new memory_pool(engine)) @@ -340,7 +349,7 @@ network::network(cldnn::BinaryInputBuffer& ib, stream::ptr stream, engine& engin , _reset_arguments(true) { net_id = get_unique_net_id(); - kernels_cache kernels_cache(get_engine(), 0, {""}); + kernels_cache kernels_cache(get_engine(), config, 0, nullptr, {""}); ib >> kernels_cache; int num_data_nodes; @@ -442,7 +451,7 @@ network::~network() { // [ executable primitive_inst ] // [ memory reuse information ] void network::save(cldnn::BinaryOutputBuffer& ob) { - kernels_cache kernels_cache(get_engine(), 0, {""}); + kernels_cache kernels_cache(get_engine(), _config, 0, nullptr, {""}); for (const auto& p_inst : _exec_order) { if (p_inst->get_impl() != nullptr) kernels_cache.add_kernels(p_inst->get_impl()->get_kernel_ids(), p_inst->get_impl()->get_kernels()); @@ -505,26 +514,27 @@ void network::save(cldnn::BinaryOutputBuffer& ob) { } network::ptr network::allocate_network(stream::ptr stream, program::ptr program, bool is_internal, bool is_primary_stream) { - return std::make_shared(program, stream, is_internal, is_primary_stream); + return std::make_shared(program, program->get_config(), stream, is_internal, is_primary_stream); } network::ptr network::allocate_network(engine& engine, program::ptr program, bool is_internal, bool is_primary_stream) { - auto stream = engine.create_stream(); - return std::make_shared(program, stream, is_internal, is_primary_stream); + auto stream = engine.create_stream(program->get_config()); + return std::make_shared(program, program->get_config(), stream, is_internal, is_primary_stream); } network::ptr network::build_network(engine& engine, const topology& topology, - const build_options& options, + const ExecutionConfig& config, bool is_internal) { - return std::make_shared(engine, topology, options, is_internal); + return std::make_shared(engine, topology, config, is_internal); } network::ptr network::build_network(engine& engine, - const std::set>& nodes, - const build_options& options, - bool is_internal) { - return std::make_shared(engine, nodes, options, is_internal); + const std::set>& nodes, + const ExecutionConfig& config, + std::shared_ptr task_executor, + bool is_internal) { + return std::make_shared(engine, nodes, config, task_executor, is_internal); } void network::validate_primitives() { @@ -963,8 +973,7 @@ void network::execute_impl(const std::vector& events) { } // Store events only in case of OOO queue or enabled Profiling - auto store_events = get_stream().get_queue_type() == queue_types::out_of_order || - get_engine().configuration().enable_profiling; + auto store_events = get_stream().get_queue_type() == QueueTypes::out_of_order || _config.get_property(ov::enable_profiling); if (store_events) { if (_program != nullptr) { for (auto& inst : _program->get_processing_order()) { @@ -1113,8 +1122,8 @@ void network::execute_primitive(const std::shared_ptr& primitive event::ptr ev = primitive->execute(events); // Collect events only for OOO queue and Profiling mode - if (get_stream().get_queue_type() == queue_types::out_of_order || - get_engine().configuration().enable_profiling) { + if (get_stream().get_queue_type() == QueueTypes::out_of_order || + get_config().get_property(ov::enable_profiling)) { auto id = primitive->id(); _events.insert({id, ev}); } @@ -1203,7 +1212,7 @@ memory::ptr network::get_memory_from_pool(const layout& layout, std::set dependencies, allocation_type type, bool reusable) { - if (get_engine().configuration().use_memory_pool) + if (_config.get_property(ov::intel_gpu::enable_memory_pool)) return _memory_pool->get_memory(layout, id, get_id(), dependencies, type, reusable); return _memory_pool->get_memory(layout, type); } diff --git a/src/plugins/intel_gpu/src/graph/pass_manager.cpp b/src/plugins/intel_gpu/src/graph/pass_manager.cpp index 0cfd5c61bf5..e16b3fdd5e5 100644 --- a/src/plugins/intel_gpu/src/graph/pass_manager.cpp +++ b/src/plugins/intel_gpu/src/graph/pass_manager.cpp @@ -15,7 +15,7 @@ pass_manager::pass_manager(program& p) { pass_count = 0; - auto path = get_dir_path(p.get_options()); + auto path = get_dir_path(p.get_config()); if (!path.empty()) { graph_opt_log.open(path + std::to_string(p.get_prog_id()) + "_cldnn_graph_optimizer.log"); if (graph_opt_log.is_open()) { diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index e310000104a..44fac593673 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -96,9 +96,13 @@ void primitive_inst::check_memory_to_set(const memory& mem, const layout& layout // check shared image/buffer compatibility, if applicable auto params = mem.get_internal_params(); if (params.mem_type != shared_mem_type::shared_mem_empty) { - if (!mem.is_allocated_by(get_network().get_engine())) { - CLDNN_ERROR_MESSAGE(_node->id(), "Memory object is not suitable"); - } + auto& net_engine = get_network().get_engine(); + auto& mem_engine = *mem.get_engine(); + OPENVINO_ASSERT(mem.is_allocated_by(net_engine), "[GPU] Can't set memory due to engines mismatch. ", + "Network was created for ", &net_engine, " (", + net_engine.get_device_info().dev_name, ") engine", + " while memory object was allocated for ", &mem_engine, "(", + mem_engine.get_device_info().dev_name, ")"); switch (params.mem_type) { case shared_mem_type::shared_mem_vasurface: @@ -182,7 +186,7 @@ void primitive_inst::update_shape() { auto& dep = _node->get_dependency(i); auto dep_id = dep.id(); // Events may be not created for in-order queue, so take them for OOO queue only - if (_network.has_event(dep.id()) && queue_type == queue_types::out_of_order) { + if (_network.has_event(dep.id()) && queue_type == QueueTypes::out_of_order) { dependencies_events.push_back(_network.get_primitive_event(dep_id)); GPU_DEBUG_TRACE_DETAIL << id() << ": shape infer waits for " << i << " dependency\n"; } @@ -192,9 +196,9 @@ void primitive_inst::update_shape() { } if (has_runtime_deps) { - if (!dependencies_events.empty() && queue_type == queue_types::out_of_order) { + if (!dependencies_events.empty() && queue_type == QueueTypes::out_of_order) { _network.get_stream().wait_for_events(dependencies_events); - } else if (queue_type == queue_types::in_order) { + } else if (queue_type == QueueTypes::in_order) { _network.get_stream().finish(); } } @@ -446,7 +450,7 @@ event::ptr primitive_inst::execute(const std::vector& events) { dependencies = events; } else { auto queue_type = get_network().get_stream().get_queue_type(); - if (queue_type == queue_types::out_of_order) { + if (queue_type == QueueTypes::out_of_order) { dependencies.reserve(dependencies.size() + _exec_deps.size()); for (auto& input : _exec_deps) { auto id = input->id(); @@ -755,8 +759,8 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool, uint32_t net_id, bool is_internal, size_t idx) { auto get_memory_from_pool = [&](engine& _engine, const layout& layout, const primitive_id id, std::set dependencies, allocation_type type, bool reusable) { - if (_engine.configuration().use_memory_pool) - return pool.get_memory(layout, id, net_id, dependencies, type, reusable); + if (_node.get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) + return pool.get_memory(layout, id, net_id, dependencies, type, reusable); return pool.get_memory(layout, type); }; @@ -933,10 +937,11 @@ cldnn::network::ptr primitive_inst::get_unfused_subgraph() { in = _node->get_dependency(i).id(); } } - build_options bo; - bo.set_option(build_option::allow_static_input_reorder(true)); - bo.set_option(build_option::allow_new_shape_infer(true)); - auto prog = program::build_program(get_network().get_engine(), t, bo, true, false); + ExecutionConfig subgraph_config{ + ov::intel_gpu::allow_static_input_reorder(true), + ov::intel_gpu::allow_new_shape_infer(true) + }; + auto prog = program::build_program(get_network().get_engine(), t, subgraph_config, true, false); _unfused_subgraph = network::allocate_network(get_network().get_stream_ptr(), prog, true, get_network().is_primary_stream()); } diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 0224e7ddc30..3d90162b455 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -8,6 +8,8 @@ #include "intel_gpu/runtime/debug_configuration.hpp" #include "intel_gpu/graph/program.hpp" +#include + #include "kernel_selector_helper.h" #include "device_cache_reader.h" #include "auto_tuner.h" @@ -98,13 +100,13 @@ using namespace ov::intel_gpu; program::program(engine& engine_ref, topology const& topology, - build_options const& options, + const ExecutionConfig& config, bool is_internal, bool no_optimizations, bool is_body_program) : _engine(engine_ref), - _stream(_engine.create_stream()), - options(options), + _stream(_engine.create_stream(config)), + _config(config), processing_order(), tuning_cache(nullptr), is_body_program(is_body_program), @@ -112,10 +114,13 @@ program::program(engine& engine_ref, init_primitives(); set_options(); query_local_block_io_supported(); + _task_executor = make_task_executor(_config); + + GPU_DEBUG_INFO << "Program config\n" << config.to_string(); pm = std::unique_ptr(new pass_manager(*this)); prepare_nodes(topology); - _kernels_cache = std::unique_ptr(new kernels_cache(_engine, prog_id, + _kernels_cache = std::unique_ptr(new kernels_cache(_engine, _config, prog_id, _task_executor, kernel_selector::KernelBase::get_db().get_batch_header_str())); program_node::reset_unique_id(); @@ -128,11 +133,13 @@ program::program(engine& engine_ref, program::program(engine& engine_ref, std::set> const& nodes, - build_options const& options, + const ExecutionConfig& config, + std::shared_ptr task_executor, bool is_internal) : _engine(engine_ref), - _stream(_engine.create_stream()), - options(options), + _stream(_engine.create_stream(config)), + _config(config), + _task_executor(task_executor), processing_order(), tuning_cache(nullptr), is_subgroup_local_block_io_supported(-1) { @@ -140,7 +147,9 @@ program::program(engine& engine_ref, set_options(); query_local_block_io_supported(); - _kernels_cache = std::unique_ptr(new kernels_cache(_engine, prog_id, + _task_executor = make_task_executor(_config); + + _kernels_cache = std::unique_ptr(new kernels_cache(_engine, _config, prog_id, _task_executor, kernel_selector::KernelBase::get_db().get_batch_header_str())); pm = std::unique_ptr(new pass_manager(*this)); prepare_nodes(nodes); @@ -149,8 +158,8 @@ program::program(engine& engine_ref, program::program(engine& engine) : _engine(engine), - _stream(_engine.create_stream()), - options(build_options()), + _stream(_engine.create_stream({})), + _config(), processing_order(), tuning_cache(nullptr), is_subgroup_local_block_io_supported(-1) { } @@ -171,6 +180,42 @@ void program::init_primitives() { } } +static void adjust_num_cores(InferenceEngine::CPUStreamsExecutor::Config& config) { + if (InferenceEngine::getAvailableCoresTypes().size() == 1) { + return; + } + + const auto total_num_cores = InferenceEngine::getNumberOfLogicalCPUCores(); + const auto total_num_big_cores = InferenceEngine::getNumberOfLogicalCPUCores(true); + const auto total_num_little_cores = total_num_cores - total_num_big_cores; + auto core_type = config._threadPreferredCoreType; + + int num_cores = total_num_cores; + if (core_type == InferenceEngine::IStreamsExecutor::Config::BIG) { + num_cores = total_num_big_cores; + } else if (core_type == InferenceEngine::IStreamsExecutor::Config::LITTLE) { + num_cores = total_num_little_cores; + } + + config._streams = std::min(config._streams, num_cores); +} + +std::shared_ptr program::make_task_executor(const ExecutionConfig& config) const { + InferenceEngine::CPUStreamsExecutor::Config task_executor_config("CPU Tasks executor for GPU plugin", 1); + task_executor_config._streams = config.get_property(ov::compilation_num_threads); + auto priority = config.get_property(ov::intel_gpu::hint::host_task_priority); + switch (priority) { + case ov::hint::Priority::LOW: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::LITTLE; break; + case ov::hint::Priority::MEDIUM: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::ANY; break; + case ov::hint::Priority::HIGH: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::BIG; break; + default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority); + } + + adjust_num_cores(task_executor_config); + + return std::make_shared(task_executor_config); +} + void program::compile() { GPU_DEBUG_DEFINE_MEM_LOGGER("compile"); _kernels_cache->build_all(); @@ -190,7 +235,7 @@ void program::load_tuning_cache() { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "ProgramImpl::LoadTuningCache"); GPU_DEBUG_DEFINE_MEM_LOGGER("ProgramImpl::LoadTuningCache"); try { - tuning_cache = kernel_selector::CreateTuningCacheFromFile(get_engine().configuration().tuning_cache_path); + tuning_cache = kernel_selector::CreateTuningCacheFromFile("cache.json"); } catch (...) { tuning_cache = std::make_shared(); } @@ -210,18 +255,19 @@ kernels_cache& program::get_kernels_cache() const { program::ptr program::build_program(engine& engine, const topology& topology, - const build_options& options, + const ExecutionConfig& config, bool is_internal, bool no_optimizations, bool is_body_program) { - return std::make_shared(engine, topology, options, is_internal, no_optimizations, is_body_program); + return std::make_shared(engine, topology, config, is_internal, no_optimizations, is_body_program); } program::ptr program::build_program(engine& engine, const std::set>& nodes, - const build_options& options, + const ExecutionConfig& config, + std::shared_ptr task_executor, bool is_internal) { - return std::make_shared(engine, nodes, options, is_internal); + return std::make_shared(engine, nodes, config, task_executor, is_internal); } program_node& program::get_node(primitive_id const& id) { @@ -449,20 +495,8 @@ void program::set_options() { static std::atomic id_gen{0}; prog_id = ++id_gen; assert(prog_id != 0); - - if ((options.get()->config.mode == tuning_mode::tuning_tune_and_cache || - options.get()->config.mode == tuning_mode::tuning_retune_and_cache) && - !_engine.configuration().enable_profiling) { - throw std::invalid_argument("Engine must be created with profiling enabled in tune_and_cache mode!"); - } - - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - options.set_option(cldnn::build_option::graph_dumps_dir(debug_config->dump_graphs)); - } - - if (!options.get()->forcing.empty()) { - options.set_option(build_option::optimize_data(true)); + if (!_config.get_property(ov::intel_gpu::force_implementations).empty()) { + _config.set_property(ov::intel_gpu::optimize_data(true)); } } @@ -502,8 +536,8 @@ void program::query_local_block_io_supported() { kernel_string->batch_compilation = true; try { - auto _kernels_cache_device_query = std::unique_ptr(new kernels_cache(_engine, prog_id, - kernel_selector::KernelBase::get_db().get_batch_header_str())); + auto _kernels_cache_device_query = std::unique_ptr(new kernels_cache(_engine, _config, prog_id, nullptr, + kernel_selector::KernelBase::get_db().get_batch_header_str())); auto id = _kernels_cache_device_query->set_kernel_source(kernel_string, false); _kernels_cache_device_query->build_all(); @@ -533,7 +567,7 @@ void program::build_program(bool is_internal) { #endif prepare_memory_dependencies(); - if (options.get()->enabled()) { + if (_config.get_property(ov::intel_gpu::partial_build_program)) { return; } @@ -582,7 +616,8 @@ void program::pre_optimize_graph(bool is_internal) { node->get_output_layouts(); } - if (options.get()->enabled()) { + bool optimize_data = _config.get_property(ov::intel_gpu::optimize_data); + if (optimize_data) { apply_opt_pass(); } @@ -590,7 +625,7 @@ void program::pre_optimize_graph(bool is_internal) { set_layout_optimizer_attributes(lo); reorder_factory rf; - if (options.get()->enabled()) { + if (optimize_data) { apply_opt_pass(); apply_opt_pass(lo); @@ -623,7 +658,7 @@ void program::pre_optimize_graph(bool is_internal) { apply_opt_pass(output_size_handling_enabled); - apply_opt_pass(lo, options.get()->enabled()); + apply_opt_pass(lo, optimize_data); if (!is_internal) { // ToDo remove hidden dependencies from propagate_constants pass @@ -631,7 +666,7 @@ void program::pre_optimize_graph(bool is_internal) { } // try to fuse buffers (i.e. depth_concat in bfyx format) after padding calculations - if (options.get()->enabled()) { + if (optimize_data) { apply_opt_pass(); } @@ -653,17 +688,18 @@ void program::post_optimize_graph(bool is_internal) { apply_opt_pass(lo, false, true); // TODO: do we need it at this place also? + auto partial_build = _config.get_property(ov::intel_gpu::partial_build_program); #ifdef GPU_DEBUG_CONFIG GPU_DEBUG_GET_INSTANCE(debug_config); - if (!is_internal && (!options.get()->enabled() || !debug_config->dry_run_path.empty())) { + if (!is_internal && (!partial_build || !debug_config->dry_run_path.empty())) { #else - if (!is_internal && !options.get()->enabled()) { + if (!is_internal && !partial_build) { #endif // ToDo remove hidden dependencies from propagate_constants pass apply_opt_pass(); } - if (options.get()->enabled()) + if (_config.get_property(ov::intel_gpu::optimize_data)) apply_opt_pass(lo, false, true, true); // pass to remove output reorders while all others graph optimizations were done // update loop input/output primitive mappings @@ -743,17 +779,6 @@ void program::cleanup() { for (auto& node : processing_order) node->get_output_layout(); - // in debug build, at the end, mark all nodes as outputs so user can query for buffers of all not-optimized nodes, - // including internal ones etc. - if (is_debug_build()) { - for (auto& node : processing_order) { - if (!node->is_output()) { - node->set_output(true); - outputs.push_back(node); - } - } - } - _kernels_cache->reset(); } @@ -786,7 +811,7 @@ program::nodes_ordering& program::get_processing_order() { return processing_ord const program::nodes_ordering& program::get_processing_order() const { return processing_order; } void program::prepare_memory_dependencies() { - if (!get_engine().configuration().use_memory_pool) + if (!_config.get_property(ov::intel_gpu::enable_memory_pool)) return; apply_opt_pass(); @@ -1046,7 +1071,7 @@ bool program::remove_if_dangling(program_node& node) { if (!node.dependencies.empty()) return false; - if (!node.is_output() || is_debug_build()) { + if (!node.is_output()) { if (node.is_input()) inputs.remove(&node); @@ -1062,7 +1087,7 @@ bool program::extract(program_node& node) { if (node.get_dependencies().size() != 1) return false; - if (node.is_output() && !is_debug_build()) { + if (node.is_output()) { auto& prev = node.get_dependency(0); auto node_id = node.id(); @@ -1248,7 +1273,7 @@ void program::remove_nodes(std::vector& to_remove) { void program::dump_program(const char* stage, bool with_full_info, std::function const& filter) const { - std::string path = get_dir_path(options); + std::string path = get_dir_path(_config); if (path.empty() || !with_full_info) { return; } @@ -1372,7 +1397,7 @@ program::primitives_info program::get_current_stage_info() const { void program::save_pass_info(std::string pass_name) { // TODO: Directory path here can be probably changed to some bool flag - if (!options.get()->directory_path.empty()) + if (!_config.get_property(ov::intel_gpu::dump_graphs).empty()) optimizer_passes_info.emplace_back(pass_name, get_current_stage_info()); } @@ -1400,7 +1425,8 @@ const program::primitives_info& program::get_primitives_info() const { return pr void program::apply_opt_pass(base_pass& pass) { pm->run(*this, pass); } void program::set_layout_optimizer_attributes(layout_optimizer& lo) { - lo.set_implementation_forcing(options.get()->forcing); + lo.set_implementation_forcing(_config.get_property(ov::intel_gpu::force_implementations)); + // first pass to set layout optimization_attributes for topology bool can_use_fsv16 = true; @@ -1625,7 +1651,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { auto& engine = get_engine(); if (engine.get_device_info().supports_immad && engine.get_device_info().vendor_id == INTEL_VENDOR_ID && - engine.configuration().queue_type == queue_types::in_order) + get_config().get_property(ov::intel_gpu::queue_type) == QueueTypes::in_order) lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, 1); #endif } diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp index 211bf8caf4f..af6f8a2595b 100644 --- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp @@ -139,8 +139,8 @@ std::string get_node_id(const program_node* ptr) { return "node_" + std::to_stri void dump_full_node(std::ofstream& out, const program_node* node) { out << node->type()->to_string(*node); } } // namespace -std::string get_dir_path(build_options opts) { - auto path = opts.get()->directory_path; +std::string get_dir_path(const ExecutionConfig& config) { + auto path = config.get_property(ov::intel_gpu::dump_graphs); if (path.empty()) { return {}; } @@ -151,15 +151,6 @@ std::string get_dir_path(build_options opts) { return path; } -/// Returns given name for serialization process. -inline std::string get_serialization_network_name(build_options opts) { - return opts.get()->serialization_network_name; -} - -inline std::string get_load_program_name(build_options opts) { - return opts.get()->load_program_name; -} - void dump_graph_init(std::ofstream& graph, const program& program, std::function const& filter) { diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index b5863898d58..f2f956b6fd6 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -245,8 +245,7 @@ bool program_node::is_detached(bool whole_branch) { } layout program_node::calc_output_layout() const { - bool allow_new_shape_infer = - get_program().get_options().get()->enabled(); + bool allow_new_shape_infer = get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer); if (allow_new_shape_infer) { auto out_layouts = type()->calc_output_layouts(*this, *get_kernel_impl_params()); if (!out_layouts.empty()) { @@ -262,8 +261,7 @@ layout program_node::calc_output_layout() const { } std::vector program_node::calc_output_layouts() const { - bool allow_new_shape_infer = - get_program().get_options().get()->enabled(); + bool allow_new_shape_infer = get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer); if (allow_new_shape_infer) { auto out_layouts = type()->calc_output_layouts(*this, *get_kernel_impl_params()); if (!out_layouts.empty()) @@ -802,7 +800,7 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const memory::ptr cur_bin_mem_ptr = cur_node.as().get_attached_memory_ptr(); if (cur_bin_mem_ptr == nullptr) throw std::runtime_error("OneDNN post-ops optimization error: nonexistent node for bin + eltw"); - auto& stream = cur_bin_mem_ptr->get_engine()->get_program_stream(); + auto& stream = cur_bin_mem_ptr->get_engine()->get_service_stream(); mem_lock bin_and_eltw_lock(cur_bin_mem_ptr, stream); size_t cur_bin_mem_size = cur_node.get_output_layout().count(); @@ -844,7 +842,7 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const memory::ptr prev_bin_mem_ptr = prev_node.as().get_attached_memory_ptr(); if (prev_bin_mem_ptr == nullptr) throw std::runtime_error("OneDNN post-ops optimization error: nonexistent node for eltw + bin"); - auto& stream = prev_bin_mem_ptr->get_engine()->get_program_stream(); + auto& stream = prev_bin_mem_ptr->get_engine()->get_service_stream(); mem_lock eltw_and_bin_lock(prev_bin_mem_ptr, stream); size_t prev_bin_mem_size = prev_node.get_output_layout().count(); @@ -932,7 +930,7 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const memory::ptr prev_scale_mem_ptr = prev_node.as().get_attached_memory_ptr(); if (prev_scale_mem_ptr == nullptr) throw std::runtime_error("OneDNN post-ops optimization error: nonexistent node for eltw + scale"); - auto& stream = prev_scale_mem_ptr->get_engine()->get_program_stream(); + auto& stream = prev_scale_mem_ptr->get_engine()->get_service_stream(); mem_lock eltw_and_scale_lock(prev_scale_mem_ptr, stream); size_t prev_scale_mem_size = prev_node.get_output_layout().count(); diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index b8b5d54d387..71b54df3704 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -11,6 +11,7 @@ #include "intel_gpu/plugin/compiled_model.hpp" #include "intel_gpu/plugin/async_infer_request.hpp" #include "intel_gpu/plugin/async_infer_request_legacy.hpp" +#include "intel_gpu/plugin/legacy_api_helper.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" #include @@ -35,30 +36,27 @@ using namespace InferenceEngine::details; namespace ov { namespace intel_gpu { -CompiledModel::CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_ptr context, Config config) : +CompiledModel::CompiledModel(InferenceEngine::CNNNetwork &network, + InferenceEngine::RemoteContext::Ptr context, + const ExecutionConfig& config) : InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]() -> InferenceEngine::ITaskExecutor::Ptr { - if (config.exclusiveAsyncRequests) { + if (config.get_property(ov::intel_gpu::exclusive_async_requests)) { //exclusiveAsyncRequests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior return executorManager()->getExecutor("GPU"); - } else if (config.throughput_streams > 1) { + } else if (config.get_property(ov::num_streams) > 1) { return std::make_shared( - IStreamsExecutor::Config{"Intel GPU plugin executor", config.throughput_streams}); + IStreamsExecutor::Config{"Intel GPU plugin executor", config.get_property(ov::num_streams)}); } else { return std::make_shared( IStreamsExecutor::Config{"Intel GPU plugin executor", 1}); } }()}, + m_context(context), m_config(config), m_taskExecutor{ _taskExecutor }, m_waitExecutor(executorManager()->getIdleCPUStreamsExecutor({ "GPUWaitExecutor" })) { - auto casted_context = std::dynamic_pointer_cast(context); - - OPENVINO_ASSERT((casted_context != nullptr), "Invalid remote context"); - - m_context = casted_context; - - auto graph_base = std::make_shared(network, m_context, m_config, 0); - for (uint16_t n = 0; n < m_config.throughput_streams; n++) { + auto graph_base = std::make_shared(network, get_context_impl(m_context), m_config, 0); + for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } @@ -87,29 +85,27 @@ static InferenceEngine::Layout layout_from_string(const std::string & name) { IE_THROW(NetworkNotRead) << "Unknown layout with name '" << name << "'"; } -CompiledModel::CompiledModel(std::istream& networkModel, std::shared_ptr context, Config config) : +CompiledModel::CompiledModel(std::istream& networkModel, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config) : InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]() -> InferenceEngine::ITaskExecutor::Ptr { - if (config.exclusiveAsyncRequests) { + if (config.get_property(ov::intel_gpu::exclusive_async_requests)) { //exclusiveAsyncRequests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior return executorManager()->getExecutor("GPU"); - } else if (config.throughput_streams > 1) { + } else if (config.get_property(ov::num_streams) > 1) { return std::make_shared( - IStreamsExecutor::Config{"Intel GPU plugin executor", config.throughput_streams}); + IStreamsExecutor::Config{"Intel GPU plugin executor", config.get_property(ov::num_streams)}); } else { return std::make_shared( IStreamsExecutor::Config{"Intel GPU plugin executor", 1}); } }()}, + m_context(context), m_config(config), m_taskExecutor{ _taskExecutor }, m_waitExecutor(executorManager()->getIdleCPUStreamsExecutor({ "GPUWaitExecutor" })) { - auto casted_context = std::dynamic_pointer_cast(context); + auto context_impl = get_context_impl(m_context); + auto& engine = context_impl->get_engine(); - OPENVINO_ASSERT((casted_context != nullptr), "Invalid remote context"); - - m_context = casted_context; - - cldnn::BinaryInputBuffer ib(networkModel, *getContextImpl(m_context)->GetEngine()); + cldnn::BinaryInputBuffer ib(networkModel, engine); // InputsInfo and OutputsInfor for CNNNetwork { @@ -255,8 +251,8 @@ CompiledModel::CompiledModel(std::istream& networkModel, std::shared_ptr(ib, m_context, m_config, 0); - for (uint16_t n = 0; n < m_config.throughput_streams; n++) { + auto graph_base = std::make_shared(ib, context_impl, m_config, 0); + for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } @@ -266,9 +262,9 @@ template IInferRequestInternal::Ptr CompiledModel::GetInferRequestImpl(const std::vector>& inputs, const std::vector>& outputs) { auto ptr = std::make_shared(inputs, outputs, std::static_pointer_cast(shared_from_this())); - if (m_config.throughput_streams > 1) + if (m_config.get_property(ov::num_streams) > 1) ptr->EnableStreams(); - if (m_config.useProfiling) + if (m_config.get_property(ov::enable_profiling)) ptr->EnableProfiling(); if (m_graphs.front()->use_external_queue()) ptr->enable_external_queue(); @@ -282,9 +278,9 @@ IInferRequestInternal::Ptr CompiledModel::CreateInferRequestImpl(InputsDataMap n OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::CreateInferRequestImpl"); auto ptr = std::make_shared(networkInputs, networkOutputs, std::static_pointer_cast(shared_from_this())); - if (m_config.throughput_streams > 1) + if (m_config.get_property(ov::num_streams) > 1) ptr->EnableStreams(); - if (m_config.useProfiling) + if (m_config.get_property(ov::enable_profiling)) ptr->EnableProfiling(); if (m_graphs.front()->use_external_queue()) ptr->enable_external_queue(); @@ -469,50 +465,17 @@ std::shared_ptr CompiledModel::GetExecGraphInfo() { } InferenceEngine::Parameter CompiledModel::GetConfig(const std::string &name) const { - const bool is_new_api = _plugin->IsNewAPI(); - auto it = m_config.key_config_map.find(name); - if (it != m_config.key_config_map.end()) { - std::string val = it->second; - if (is_new_api) { - if (name == ov::enable_profiling) { - return val == PluginConfigParams::YES ? true : false; - } else if (name == ov::hint::model_priority) { - return ov::util::from_string(val, ov::hint::model_priority); - } else if (name == ov::intel_gpu::hint::host_task_priority) { - return ov::util::from_string(val, ov::intel_gpu::hint::host_task_priority); - } else if (name == ov::intel_gpu::hint::queue_priority) { - return ov::util::from_string(val, ov::intel_gpu::hint::queue_priority); - } else if (name == ov::intel_gpu::hint::queue_throttle) { - return ov::util::from_string(val, ov::intel_gpu::hint::queue_throttle); - } else if (name == ov::intel_gpu::enable_loop_unrolling) { - return val == PluginConfigParams::YES ? true : false; - } else if (name == ov::cache_dir) { - return ov::util::from_string(val, ov::cache_dir); - } else if (name == ov::hint::performance_mode) { - return ov::util::from_string(val, ov::hint::performance_mode); - } else if (name == ov::compilation_num_threads) { - return ov::util::from_string(val, ov::compilation_num_threads); - } else if (name == ov::num_streams) { - return ov::util::from_string(val, ov::num_streams); - } else if (name == ov::hint::num_requests) { - return ov::util::from_string(val, ov::hint::num_requests); - } else if (name == ov::hint::inference_precision) { - return ov::util::from_string(val, ov::hint::inference_precision); - } else if (name == ov::device::id) { - return ov::util::from_string(val, ov::device::id); - } else { - return val; - } - } else { - if (name == PluginConfigParams::KEY_MODEL_PRIORITY || - name == GPUConfigParams::KEY_GPU_HOST_TASK_PRIORITY) - return Config::ConvertPropertyToLegacy(name, val); - else - return val; - } - } else { - IE_THROW() << "Unsupported ExecutableNetwork config key: " << name; + auto actual_name = name; + if (LegacyAPIHelper::is_legacy_property({name, nullptr}, _plugin->IsNewAPI())) { + actual_name = LegacyAPIHelper::convert_legacy_property({name, nullptr}).first; } + + auto val = m_config.get_property(actual_name); + if (LegacyAPIHelper::is_legacy_property({name, nullptr}, _plugin->IsNewAPI())) { + val = LegacyAPIHelper::convert_to_legacy_property({actual_name, val}).second; + } + + return val; } InferenceEngine::Parameter CompiledModel::GetMetric(const std::string &name) const { @@ -550,14 +513,28 @@ InferenceEngine::Parameter CompiledModel::GetMetric(const std::string &name) con metrics.push_back(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)); IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics); } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) { - std::vector configKeys; - for (auto && value : m_config.key_config_map) - if (!Config::isNewApiProperty(value.first)) - configKeys.push_back(value.first); + static const std::vector configKeys { + CONFIG_KEY(MODEL_PRIORITY), + CONFIG_KEY(PERFORMANCE_HINT), + CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS), + CONFIG_KEY(PERF_COUNT), + CONFIG_KEY(DYN_BATCH_ENABLED), + CONFIG_KEY(CONFIG_FILE), + CONFIG_KEY(DEVICE_ID), + CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), + CONFIG_KEY(CACHE_DIR), + CONFIG_KEY(GPU_THROUGHPUT_STREAMS), + GPU_CONFIG_KEY(PLUGIN_PRIORITY), + GPU_CONFIG_KEY(PLUGIN_THROTTLE), + GPU_CONFIG_KEY(HOST_TASK_PRIORITY), + GPU_CONFIG_KEY(NV12_TWO_INPUTS), + GPU_CONFIG_KEY(MAX_NUM_THREADS), + GPU_CONFIG_KEY(ENABLE_LOOP_UNROLLING), + }; IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys); } else if (name == ov::optimal_number_of_infer_requests) { - unsigned int nr = m_config.throughput_streams; - if (m_config.perfHintsConfig.ovPerfHint != CONFIG_VALUE(LATENCY)) + unsigned int nr = m_config.get_property(ov::num_streams); + if (m_config.get_property(ov::hint::performance_mode) != ov::hint::PerformanceMode::LATENCY) nr *= 2; return decltype(ov::optimal_number_of_infer_requests)::value_type {nr}; } else if (name == ov::execution_devices) { diff --git a/src/plugins/intel_gpu/src/plugin/device_config.cpp b/src/plugins/intel_gpu/src/plugin/device_config.cpp deleted file mode 100644 index e0fab51afff..00000000000 --- a/src/plugins/intel_gpu/src/plugin/device_config.cpp +++ /dev/null @@ -1,499 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "intel_gpu/plugin/device_config.hpp" - -#include -#include - -#include -#include - -#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" -#include "file_utils.h" -#include "ie_api.h" -#include "intel_gpu/runtime/itt.hpp" -#include "openvino/runtime/intel_gpu/properties.hpp" -#include - -#ifdef _WIN32 -# include -# ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT -# define mkdir(dir, mode) _wmkdir(dir) -# else -# define mkdir(dir, mode) _mkdir(dir) -# endif // OPENVINO_ENABLE_UNICODE_PATH_SUPPORT -#endif // _WIN32 - -using namespace InferenceEngine; - -namespace ov { -namespace intel_gpu { - -static void createDirectory(std::string _path) { -#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) - std::wstring widepath = ov::util::string_to_wstring(_path.c_str()); - const wchar_t* path = widepath.c_str(); -#else - const char* path = _path.c_str(); -#endif - - auto err = mkdir(path, 0755); - if (err != 0 && errno != EEXIST) { - IE_THROW() << "Couldn't create directory! (err=" << err << "; errno=" << errno << ")"; - } -} - -static int getNumberOfCores(const IStreamsExecutor::Config::PreferredCoreType core_type) { - const auto total_num_cores = getNumberOfLogicalCPUCores(); - const auto total_num_big_cores = getNumberOfLogicalCPUCores(true); - const auto total_num_little_cores = total_num_cores - total_num_big_cores; - - int num_cores = total_num_cores; - if (core_type == IStreamsExecutor::Config::BIG) { - num_cores = total_num_big_cores; - } else if (core_type == IStreamsExecutor::Config::LITTLE) { - num_cores = total_num_little_cores; - } - return num_cores; -} - -IE_SUPPRESS_DEPRECATED_START -void Config::UpdateFromMap(const std::map& configMap, const cldnn::device_info& info) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Config::UpdateFromMap"); - for (auto& kvp : configMap) { - std::string key = kvp.first; - std::string val = kvp.second; - const auto hints = perfHintsConfig.SupportedKeys(); - if (hints.end() != std::find(hints.begin(), hints.end(), key)) { - perfHintsConfig.SetConfig(key, val); - } else if (key == ov::hint::inference_precision) { - std::stringstream ss(val); - ss >> inference_precision; - OPENVINO_ASSERT(inference_precision == ov::element::f16 || - inference_precision == ov::element::f32 || - inference_precision == ov::element::undefined, - "Unexpected inference precision set: ", inference_precision); - } else if (key.compare(PluginConfigParams::KEY_PERF_COUNT) == 0 || key == ov::enable_profiling) { - if (val.compare(PluginConfigParams::YES) == 0) { - useProfiling = true; - } else if (val.compare(PluginConfigParams::NO) == 0) { - useProfiling = false; - } else { - IE_THROW(NotFound) << "Unsupported property value by plugin: " << val; - } - } else if (key.compare(PluginConfigParams::KEY_DYN_BATCH_ENABLED) == 0) { - if (val.compare(PluginConfigParams::YES) == 0) { - enableDynamicBatch = true; - } else if (val.compare(PluginConfigParams::NO) == 0) { - enableDynamicBatch = false; - } else { - IE_THROW(NotFound) << "Unsupported property value by plugin: " << val; - } - } else if (key.compare(GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY) == 0) { - std::stringstream ss(val); - uint32_t uVal(0); - ss >> uVal; - if (ss.fail()) { - IE_THROW(NotFound) << "Unsupported property value by plugin: " << val; - } - switch (uVal) { - case 0: - case 2: - queuePriority = cldnn::priority_mode_types::med; - break; - case 1: - queuePriority = cldnn::priority_mode_types::low; - break; - case 3: - queuePriority = cldnn::priority_mode_types::high; - break; - default: - IE_THROW(ParameterMismatch) << "Unsupported queue priority value: " << uVal; - } - } else if (key == ov::intel_gpu::hint::queue_priority) { - std::stringstream ss(val); - ov::hint::Priority priority; - ss >> priority; - if (priority == ov::hint::Priority::HIGH) - queuePriority = cldnn::priority_mode_types::high; - else if (priority == ov::hint::Priority::MEDIUM) - queuePriority = cldnn::priority_mode_types::med; - else - queuePriority = cldnn::priority_mode_types::low; - } else if (key.compare(PluginConfigParams::KEY_MODEL_PRIORITY) == 0 || key == ov::hint::model_priority) { - if (val.compare(PluginConfigParams::MODEL_PRIORITY_HIGH) == 0 || - val.compare(ov::util::to_string(ov::hint::Priority::HIGH)) == 0) { - queuePriority = cldnn::priority_mode_types::high; - task_exec_config._threadPreferredCoreType = IStreamsExecutor::Config::BIG; - } else if (val.compare(PluginConfigParams::MODEL_PRIORITY_MED) == 0 || - val.compare(ov::util::to_string(ov::hint::Priority::MEDIUM)) == 0) { - queuePriority = cldnn::priority_mode_types::med; - task_exec_config._threadPreferredCoreType = IStreamsExecutor::Config::ANY; - } else if (val.compare(PluginConfigParams::MODEL_PRIORITY_LOW) == 0 || - val.compare(ov::util::to_string(ov::hint::Priority::LOW)) == 0) { - queuePriority = cldnn::priority_mode_types::low; - task_exec_config._threadPreferredCoreType = IStreamsExecutor::Config::LITTLE; - } else { - IE_THROW() << "Not found appropriate value for config key " << PluginConfigParams::KEY_MODEL_PRIORITY - << ".\n"; - } - if (getAvailableCoresTypes().size() > 1) { - if (task_exec_config._threadPreferredCoreType == IStreamsExecutor::Config::BIG || - task_exec_config._threadPreferredCoreType == IStreamsExecutor::Config::LITTLE) { - task_exec_config._streams = std::min(task_exec_config._streams, - getNumberOfCores(task_exec_config._threadPreferredCoreType)); - } - } else { - task_exec_config._threadPreferredCoreType = IStreamsExecutor::Config::ANY; - task_exec_config._streams = - std::min(task_exec_config._streams, static_cast(std::thread::hardware_concurrency())); - } - } else if (key.compare(GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE) == 0) { - std::stringstream ss(val); - uint32_t uVal(0); - ss >> uVal; - if (ss.fail()) { - IE_THROW(NotFound) << "Unsupported property value by plugin: " << val; - } - switch (uVal) { - case 0: - case 2: - queueThrottle = cldnn::throttle_mode_types::med; - break; - case 1: - queueThrottle = cldnn::throttle_mode_types::low; - break; - case 3: - queueThrottle = cldnn::throttle_mode_types::high; - break; - default: - IE_THROW(ParameterMismatch) << "Unsupported queue throttle value: " << uVal; - } - } else if (key == ov::intel_gpu::hint::queue_throttle) { - std::stringstream ss(val); - ov::intel_gpu::hint::ThrottleLevel throttle; - ss >> throttle; - if (throttle == ov::intel_gpu::hint::ThrottleLevel::HIGH) - queueThrottle = cldnn::throttle_mode_types::high; - else if (throttle == ov::intel_gpu::hint::ThrottleLevel::MEDIUM) - queueThrottle = cldnn::throttle_mode_types::med; - else - queueThrottle = cldnn::throttle_mode_types::low; - } else if (key.compare(PluginConfigParams::KEY_CONFIG_FILE) == 0) { - std::stringstream ss(val); - std::istream_iterator begin(ss); - std::istream_iterator end; - std::vector configFiles(begin, end); - for (auto& file : configFiles) { - CustomLayer::LoadFromFile(file, customLayers); - } - } else if (key.compare(PluginConfigParams::KEY_CACHE_DIR) == 0 || key == ov::cache_dir) { - if (!val.empty()) { - kernels_cache_dir = val; - createDirectory(kernels_cache_dir); - } - } else if (key.compare(PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS) == 0) { - if (val.compare(PluginConfigParams::YES) == 0) { - exclusiveAsyncRequests = true; - } else if (val.compare(PluginConfigParams::NO) == 0) { - exclusiveAsyncRequests = false; - } else { - IE_THROW(NotFound) << "Unsupported property value by plugin: " << val; - } - } else if (key.compare(PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS) == 0 || key == ov::num_streams) { - if (val.compare(PluginConfigParams::GPU_THROUGHPUT_AUTO) == 0 || - val.compare(ov::util::to_string(ov::streams::AUTO)) == 0) { - throughput_streams = std::max(GetDefaultNStreamsForThroughputMode(), info.num_ccs); - } else { - int val_i; - try { - val_i = std::stoi(val); - } catch (const std::exception&) { - IE_THROW() << "Wrong value for property key " << PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS - << ". Expected only positive numbers (#streams) or " - << "PluginConfigParams::GPU_THROUGHPUT_AUTO"; - } - if (val_i > 0) - throughput_streams = static_cast(val_i); - } - } else if (key.compare(PluginConfigParams::KEY_DEVICE_ID) == 0 || key == ov::device::id) { - // Validate if passed value is postivie number. - try { - int val_i = std::stoi(val); - (void)val_i; - } catch (const std::exception&) { - IE_THROW() << "Wrong value for property key " << ov::device::id.name() - << ". DeviceIDs are only represented by positive numbers"; - } - // Set this value. - device_id = val; - } else if (key.compare(PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE) == 0) { - if (val.compare(PluginConfigParams::YES) == 0) { - enableInt8 = true; - } else if (val.compare(PluginConfigParams::NO) == 0) { - enableInt8 = false; - } else { - IE_THROW(NotFound) << "Unsupported property value by plugin: " << val; - } - } else if (key.compare(GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS) == 0) { - if (val.compare(PluginConfigParams::YES) == 0) { - nv12_two_inputs = true; - } else if (val.compare(PluginConfigParams::NO) == 0) { - nv12_two_inputs = false; - } else { - IE_THROW(NotFound) << "Unsupported NV12 flag value: " << val; - } - } else if (key.compare(GPUConfigParams::KEY_GPU_MAX_NUM_THREADS) == 0 || key == ov::compilation_num_threads) { - int max_threads = std::max(1, static_cast(std::thread::hardware_concurrency())); - try { - int val_i = std::stoi(val); - if (val_i <= 0 || val_i > max_threads) { - val_i = max_threads; - } - task_exec_config._streams = std::min(task_exec_config._streams, val_i); - } catch (const std::exception&) { - IE_THROW() << "Wrong value for property key " << GPUConfigParams::KEY_GPU_MAX_NUM_THREADS << ": " << val - << "\nSpecify the number of threads use for build as an integer." - << "\nOut of range value will be set as a default value, maximum concurrent threads."; - } - } else if (key.compare(GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING) == 0 || - key == ov::intel_gpu::enable_loop_unrolling) { - if (val.compare(PluginConfigParams::YES) == 0) { - enable_loop_unrolling = true; - } else if (val.compare(PluginConfigParams::NO) == 0) { - enable_loop_unrolling = false; - } else { - IE_THROW(ParameterMismatch) << "Unsupported KEY_GPU_ENABLE_LOOP_UNROLLING flag value: " << val; - } - } else if (key.compare(GPUConfigParams::KEY_GPU_HOST_TASK_PRIORITY) == 0 || - key == ov::intel_gpu::hint::host_task_priority) { - if (val.compare(GPUConfigParams::GPU_HOST_TASK_PRIORITY_HIGH) == 0 || - val.compare(ov::util::to_string(ov::hint::Priority::HIGH)) == 0) { - task_exec_config._threadPreferredCoreType = IStreamsExecutor::Config::BIG; - } else if (val.compare(GPUConfigParams::GPU_HOST_TASK_PRIORITY_MEDIUM) == 0 || - val.compare(ov::util::to_string(ov::hint::Priority::MEDIUM)) == 0) { - task_exec_config._threadPreferredCoreType = IStreamsExecutor::Config::ANY; - } else if (val.compare(GPUConfigParams::GPU_HOST_TASK_PRIORITY_LOW) == 0 || - val.compare(ov::util::to_string(ov::hint::Priority::LOW)) == 0) { - task_exec_config._threadPreferredCoreType = IStreamsExecutor::Config::LITTLE; - } else { - IE_THROW(NotFound) << "Unsupported host task priority by plugin: " << val; - } - } else { - IE_THROW(NotFound) << "Unsupported property key by plugin: " << key; - } - - adjustKeyMapValues(); - } -} - -void Config::adjustKeyMapValues() { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Config::AdjustKeyMapValues"); - if (useProfiling) { - key_config_map[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES; - key_config_map[ov::enable_profiling.name()] = PluginConfigParams::YES; - } else { - key_config_map[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::NO; - key_config_map[ov::enable_profiling.name()] = PluginConfigParams::NO; - } - - if (exclusiveAsyncRequests) - key_config_map[PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS] = PluginConfigParams::YES; - else - key_config_map[PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS] = PluginConfigParams::NO; - - if (enableDynamicBatch) - key_config_map[PluginConfigParams::KEY_DYN_BATCH_ENABLED] = PluginConfigParams::YES; - else - key_config_map[PluginConfigParams::KEY_DYN_BATCH_ENABLED] = PluginConfigParams::NO; - - if (nv12_two_inputs) { - key_config_map[GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS] = PluginConfigParams::YES; - } else { - key_config_map[GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS] = PluginConfigParams::NO; - } - - key_config_map[ov::hint::inference_precision.name()] = inference_precision.get_type_name(); - - { - if (queuePriority == cldnn::priority_mode_types::high && - (task_exec_config._threadPreferredCoreType == IStreamsExecutor::Config::BIG || - getAvailableCoresTypes().size() == 1)) { - key_config_map[ov::hint::model_priority.name()] = - ov::util::to_string(ov::hint::Priority::HIGH); - } else if (queuePriority == cldnn::priority_mode_types::low && - (task_exec_config._threadPreferredCoreType == IStreamsExecutor::Config::LITTLE || - getAvailableCoresTypes().size() == 1)) { - key_config_map[ov::hint::model_priority.name()] = - ov::util::to_string(ov::hint::Priority::LOW); - } else if (queuePriority == cldnn::priority_mode_types::med && - task_exec_config._threadPreferredCoreType == IStreamsExecutor::Config::ANY) { - key_config_map[ov::hint::model_priority.name()] = - ov::util::to_string(ov::hint::Priority::MEDIUM); - } - } - { - std::string qp = "0"; - switch (queuePriority) { - case cldnn::priority_mode_types::low: - qp = "1"; - break; - case cldnn::priority_mode_types::med: - qp = "2"; - break; - case cldnn::priority_mode_types::high: - qp = "3"; - break; - default: - break; - } - key_config_map[GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY] = qp; - } - { - std::string priority; - if (queuePriority == cldnn::priority_mode_types::high) - priority = ov::util::to_string(ov::hint::Priority::HIGH); - else if (queuePriority == cldnn::priority_mode_types::low) - priority = ov::util::to_string(ov::hint::Priority::LOW); - else - priority = ov::util::to_string(ov::hint::Priority::MEDIUM); - key_config_map[ov::intel_gpu::hint::queue_priority.name()] = priority; - } - { - std::string qt = "0"; - switch (queueThrottle) { - case cldnn::throttle_mode_types::low: - qt = "1"; - break; - case cldnn::throttle_mode_types::med: - qt = "2"; - break; - case cldnn::throttle_mode_types::high: - qt = "3"; - break; - default: - break; - } - key_config_map[GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE] = qt; - } - { - std::string throttleLevel; - if (queueThrottle == cldnn::throttle_mode_types::high) - throttleLevel = ov::util::to_string(ov::intel_gpu::hint::ThrottleLevel::HIGH); - else if (queueThrottle == cldnn::throttle_mode_types::low) - throttleLevel = ov::util::to_string(ov::intel_gpu::hint::ThrottleLevel::LOW); - else - throttleLevel = ov::util::to_string(ov::intel_gpu::hint::ThrottleLevel::MEDIUM); - key_config_map[ov::intel_gpu::hint::queue_throttle.name()] = throttleLevel; - } - { - std::string hostTaskPriority; - if (task_exec_config._threadPreferredCoreType == IStreamsExecutor::Config::LITTLE) - hostTaskPriority = ov::util::to_string(ov::hint::Priority::LOW); - else if (task_exec_config._threadPreferredCoreType == IStreamsExecutor::Config::BIG) - hostTaskPriority = ov::util::to_string(ov::hint::Priority::HIGH); - else - hostTaskPriority = ov::util::to_string(ov::hint::Priority::MEDIUM); - key_config_map[ov::intel_gpu::hint::host_task_priority.name()] = hostTaskPriority; - } - - key_config_map[PluginConfigParams::KEY_CACHE_DIR] = kernels_cache_dir; - key_config_map[ov::cache_dir.name()] = kernels_cache_dir; - - key_config_map[PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS] = std::to_string(throughput_streams); - key_config_map[ov::num_streams.name()] = std::to_string(throughput_streams); - - key_config_map[PluginConfigParams::KEY_DEVICE_ID] = device_id; - key_config_map[ov::device::id.name()] = device_id; - - key_config_map[PluginConfigParams::KEY_CONFIG_FILE] = ""; - - key_config_map[GPUConfigParams::KEY_GPU_MAX_NUM_THREADS] = std::to_string(task_exec_config._streams); - key_config_map[ov::compilation_num_threads.name()] = std::to_string(task_exec_config._streams); - - if (enable_loop_unrolling) { - key_config_map[GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING] = PluginConfigParams::YES; - key_config_map[ov::intel_gpu::enable_loop_unrolling.name()] = PluginConfigParams::YES; - } else { - key_config_map[GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING] = PluginConfigParams::NO; - key_config_map[ov::intel_gpu::enable_loop_unrolling.name()] = PluginConfigParams::NO; - } - - key_config_map[PluginConfigParams::KEY_PERFORMANCE_HINT] = perfHintsConfig.ovPerfHint; - key_config_map[ov::hint::performance_mode.name()] = perfHintsConfig.ovPerfHint; - - key_config_map[PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS] = - std::to_string(perfHintsConfig.ovPerfHintNumRequests); -} - -bool Config::isNewApiProperty(std::string property) { - static const std::set new_api_keys{ - ov::intel_gpu::hint::queue_priority.name(), - ov::intel_gpu::hint::queue_throttle.name(), - ov::hint::inference_precision.name(), - ov::compilation_num_threads.name(), - ov::num_streams.name(), - }; - return new_api_keys.find(property) != new_api_keys.end(); -} - -std::string Config::ConvertPropertyToLegacy(const std::string& key, const std::string& value) { - if (key == PluginConfigParams::KEY_MODEL_PRIORITY) { - auto priority = ov::util::from_string(value, ov::hint::model_priority); - if (priority == ov::hint::Priority::HIGH) - return PluginConfigParams::MODEL_PRIORITY_HIGH; - else if (priority == ov::hint::Priority::MEDIUM) - return PluginConfigParams::MODEL_PRIORITY_MED; - else if (priority == ov::hint::Priority::LOW) - return PluginConfigParams::MODEL_PRIORITY_LOW; - } else if (key == GPUConfigParams::KEY_GPU_HOST_TASK_PRIORITY) { - auto priority = ov::util::from_string(value, ov::intel_gpu::hint::host_task_priority); - if (priority == ov::hint::Priority::HIGH) - return GPUConfigParams::GPU_HOST_TASK_PRIORITY_HIGH; - else if (priority == ov::hint::Priority::MEDIUM) - return GPUConfigParams::GPU_HOST_TASK_PRIORITY_MEDIUM; - else if (priority == ov::hint::Priority::LOW) - return GPUConfigParams::GPU_HOST_TASK_PRIORITY_LOW; - } - IE_THROW() << "Unsupported value for legacy key : " << key; -} - -bool Config::CanShareContextWith(const Config& other) const { - return this->throughput_streams == other.throughput_streams && - this->useProfiling == other.useProfiling && - this->dumpCustomKernels == other.dumpCustomKernels && - this->queueThrottle == other.queueThrottle && - this->queuePriority == other.queuePriority && - this->kernels_cache_dir == other.kernels_cache_dir && - this->device_id == other.device_id && - this->task_exec_config._streams == other.task_exec_config._streams && - this->task_exec_config._threadPreferredCoreType == other.task_exec_config._threadPreferredCoreType && - this->enable_loop_unrolling == other.enable_loop_unrolling; -} - -void Configs::CreateConfig(std::string device_id) { - if (configs.find(device_id) == configs.end()) { - configs.emplace(device_id, Config(device_id)); - } -} - -Config& Configs::GetConfig(std::string device_id) { - if (device_id.empty()) { - return GetDefaultDeviceConfig(); - } - if (configs.find(device_id) == configs.end()) { - IE_THROW() << "Config for device with " << device_id << " ID is not registered in GPU plugin"; - } - return configs.find(device_id)->second; -} - -Config& Configs::GetDefaultDeviceConfig() { - return GetConfig(default_device_id); -} - -IE_SUPPRESS_DEPRECATED_END - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index 1d33d645df3..b9d26cdafaf 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -45,32 +45,32 @@ using namespace InferenceEngine::details; namespace ov { namespace intel_gpu { -Graph::Graph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr context, Config config, uint16_t stream_id) +Graph::Graph(InferenceEngine::CNNNetwork& network, RemoteContextImpl::Ptr context, const ExecutionConfig& config, uint16_t stream_id) : m_context(context) , m_networkName(network.getName()) , m_config(config) , m_stream_id(stream_id) , m_state(0) { - m_program = std::make_shared(network, GetEngine(), m_config); + m_program = std::make_shared(network, get_engine(), config); if (m_program->m_max_batch > 1) - m_config.max_dynamic_batch = m_program->m_max_batch; + m_config.set_property(ov::intel_gpu::max_dynamic_batch(m_program->m_max_batch)); Build(); } -Graph::Graph(cldnn::BinaryInputBuffer &ib, gpu::ClContext::Ptr context, Config config, uint16_t stream_id) +Graph::Graph(cldnn::BinaryInputBuffer &ib, RemoteContextImpl::Ptr context, const ExecutionConfig& config, uint16_t stream_id) : m_context(context) , m_config(config) , m_stream_id(stream_id) , m_state(0) { - m_program = std::make_shared(GetEngine(), m_config); + m_program = std::make_shared(get_engine(), config); if (m_program->m_max_batch > 1) - m_config.max_dynamic_batch = m_program->m_max_batch; + m_config.set_property(ov::intel_gpu::max_dynamic_batch(m_program->m_max_batch)); ib >> m_program->inputLayouts; ib >> primitiveIDs; ib >> outputDims; - m_networks.emplace_back(std::make_shared(ib, GetEngine()->create_stream(), *GetEngine(), m_stream_id)); + m_networks.emplace_back(std::make_shared(ib, get_engine().create_stream(config), get_engine(), m_stream_id)); } Graph::Graph(std::shared_ptr graph, uint16_t stream_id) @@ -130,21 +130,19 @@ void Graph::Build() { } bool Graph::use_external_queue() const { - auto impl = getContextImpl(m_context); - return impl->GetExternalQueue() != nullptr; + return m_context->get_external_queue() != nullptr; } std::shared_ptr Graph::BuildNetwork(std::shared_ptr program) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::BuildNetwork"); std::shared_ptr network = nullptr; - auto impl = getContextImpl(m_context); - auto externalQueue = impl->GetExternalQueue(); + auto externalQueue = m_context->get_external_queue(); if (externalQueue) { - if (m_config.throughput_streams != 1) + if (m_config.get_property(ov::num_streams) != 1) IE_THROW(ParameterMismatch) << "Throughput streams can't be used with shared queue!\n"; - auto &engine = m_program->GetEngine(); - network = std::make_shared(program, engine.create_stream(externalQueue), m_stream_id); + auto &engine = m_program->get_engine(); + network = std::make_shared(program, engine.create_stream(m_config, externalQueue), m_stream_id); } else { network = std::make_shared(program, m_stream_id); } @@ -164,7 +162,7 @@ Graph::variable_states_map Graph::AllocateVariablesMemories() { std::vector memoryStates; memoryStates.reserve(orderedLayouts.size()); for (const auto& layout : orderedLayouts) - memoryStates.push_back(std::make_shared(GetEngine()->allocate_memory(layout, false))); + memoryStates.push_back(std::make_shared(get_engine().allocate_memory(layout, false))); states.insert({memStateInfo.first, memoryStates }); } return states; @@ -173,7 +171,7 @@ Graph::variable_states_map Graph::AllocateVariablesMemories() { std::shared_ptr Graph::GetExecGraphInfoByPrimitivesInfo(std::vector& primitives_info, bool filter_const_primitives) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::GetExecGraphInfoByPrimitivesInfo"); - if (m_config.useProfiling) { + if (m_config.get_property(ov::enable_profiling)) { try { // Update may throw an exception for step-by-step runtime graph dump, // since network->get_executed_primitives() method can't be called before network execution diff --git a/src/plugins/intel_gpu/src/plugin/infer_request.cpp b/src/plugins/intel_gpu/src/plugin/infer_request.cpp index 512edd59fea..ad641eb360c 100644 --- a/src/plugins/intel_gpu/src/plugin/infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/infer_request.cpp @@ -10,6 +10,7 @@ #include #include "intel_gpu/plugin/infer_request.hpp" #include "intel_gpu/plugin/remote_context.hpp" +#include "intel_gpu/plugin/remote_allocators.hpp" #include "intel_gpu/plugin/compiled_model.hpp" #include "intel_gpu/runtime/itt.hpp" #include "intel_gpu/plugin/variable_state.hpp" @@ -390,18 +391,22 @@ void InferRequest::SetGraph(std::shared_ptr graph) { } InferRequest::InferRequest(InputsDataMap networkInputs, OutputsDataMap networkOutputs, - const CompiledModel::Ptr& execNetwork) + const CompiledModel::Ptr& execNetwork) : IInferRequestInternal(networkInputs, networkOutputs) { IE_ASSERT(nullptr != execNetwork); streamExecutor = dynamic_cast(execNetwork->m_taskExecutor.get()); + m_context = std::dynamic_pointer_cast(execNetwork->GetContext()); + OPENVINO_ASSERT(m_context != nullptr, "[GPU] Can't initialize context of InferRequest: wrong context type"); } InferRequest::InferRequest(const std::vector>& inputs, - const std::vector>& outputs, - const CompiledModel::Ptr& execNetwork) + const std::vector>& outputs, + const CompiledModel::Ptr& execNetwork) : IInferRequestInternal(inputs, outputs) { IE_ASSERT(nullptr != execNetwork); streamExecutor = dynamic_cast(execNetwork->m_taskExecutor.get()); + m_context = std::dynamic_pointer_cast(execNetwork->GetContext()); + OPENVINO_ASSERT(m_context != nullptr, "[GPU] Can't initialize context of InferRequest: wrong context type"); } // ----------------------------------------------------------------------------------------- // @@ -450,7 +455,7 @@ void InferRequest::enqueue() { FormatFromTensorDesc(blobsDesc), tensor_from_dims(blobsDesc.getDims())); - auto mergedBlobs = create_remote_blob(blobsDesc, layout, RemoteBlobImpl::BlobType::BT_BUF_INTERNAL); + auto mergedBlobs = create_remote_blob(blobsDesc, layout, BlobType::BT_BUF_INTERNAL); dst = mergedBlobs->buffer().as(); _inputs[name] = mergedBlobs; @@ -591,8 +596,8 @@ Blob::Ptr InferRequest::create_host_blob(const TensorDesc& desc, bool is_dynamic OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::create_host_blob"); // Disable USM usage as USMHostAllocator may fail for attempt to allocate 0 bytes // If we add WA for such case to avoid driver call, then deallocate method will return false and Blob::setShape call will throw an exception - bool use_usm = m_graph->GetEngine()->use_unified_shared_memory() && !is_dynamic; - auto alloc = use_usm ? std::make_shared(m_graph->GetContext().get()) : CreateDefaultAllocator(); + bool use_usm = m_graph->get_engine().use_unified_shared_memory() && !is_dynamic; + auto alloc = use_usm ? std::make_shared(m_context) : CreateDefaultAllocator(); auto blob = make_blob_with_precision(desc, alloc); blob->allocate(); return blob; @@ -600,27 +605,27 @@ Blob::Ptr InferRequest::create_host_blob(const TensorDesc& desc, bool is_dynamic template InferenceEngine::Blob::Ptr InferRequest::create_remote_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, - const RemoteBlobImpl::BlobType mem_type, void* mem_ptr) { - auto blob = std::make_shared(m_graph->GetContext(), - m_graph->GetNetwork()->get_stream(), - desc, - layout, - mem_ptr, - 0, - 0, - mem_type); + const BlobType mem_type, void* mem_ptr) { + auto blob = std::make_shared(m_context, + m_graph->GetNetwork()->get_stream(), + desc, + layout, + mem_ptr, + 0, + 0, + mem_type); OPENVINO_ASSERT(blob, "[GPU] Failed to allocate remote blob"); blob->allocate(); return blob; } template InferenceEngine::Blob::Ptr InferRequest::create_remote_blob(const InferenceEngine::TensorDesc&, const cldnn::layout&, - const RemoteBlobImpl::BlobType, void*); + const BlobType, void*); template InferenceEngine::Blob::Ptr InferRequest::create_remote_blob(const InferenceEngine::TensorDesc&, const cldnn::layout&, - const RemoteBlobImpl::BlobType, void*); + const BlobType, void*); Blob::Ptr InferRequest::create_shared_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, void* usm_host_mem) { - auto blob = create_remote_blob(desc, layout, RemoteBlobImpl::BlobType::BT_USM_SHARED, usm_host_mem); + auto blob = create_remote_blob(desc, layout, BlobType::BT_USM_SHARED, usm_host_mem); OPENVINO_ASSERT(blob, "[GPU] Failed to allocate shared host <-> device blob"); return blob; } @@ -771,7 +776,7 @@ void InferRequest::allocate_inputs() { _inputs[name] = create_host_blob(desc, input_layout.is_dynamic()); // Pre-allocate device input only if USM is not supported; in other case it will be allocated // in prepare_input() function later - if (input_layout.is_static() && !m_graph->GetEngine()->use_unified_shared_memory()) { + if (input_layout.is_static() && !m_graph->get_engine().use_unified_shared_memory()) { _deviceInputs[name] = create_device_blob(desc); } } @@ -813,7 +818,7 @@ void InferRequest::allocate_outputs() { _outputs[no.first] = create_host_blob(desc, output_layout.is_dynamic()); // Pre-allocate device output only if USM is not supported; in other case it will be allocated // in prepare_output() function later - if (output_layout.is_static() && !m_graph->GetEngine()->use_unified_shared_memory()) { + if (output_layout.is_static() && !m_graph->get_engine().use_unified_shared_memory()) { _deviceOutputs[no.first] = create_device_blob(desc); } } @@ -840,7 +845,7 @@ std::map InferRequest::GetPerformanceCo void InferRequest::allocate_dev_mem_if_needed(InferenceEngine::BlobMap& device_mems, InferenceEngine::Blob::Ptr& user_blob, const cldnn::primitive_id& blob_name, const cldnn::layout& layout, bool need_lockable_mem) { const auto input_ptr = static_cast(user_blob->cbuffer()); - const auto alloc_type = m_graph->GetEngine()->detect_usm_allocation_type(input_ptr); + const auto alloc_type = m_graph->get_engine().detect_usm_allocation_type(input_ptr); const auto is_usm_host = alloc_type == cldnn::allocation_type::usm_host; const auto has_device_blob = device_mems.find(blob_name) != device_mems.end(); bool can_skip_allocation = false; @@ -851,7 +856,7 @@ void InferRequest::allocate_dev_mem_if_needed(InferenceEngine::BlobMap& device_m OPENVINO_ASSERT(impl, str_device_output_unsupported_blob); OPENVINO_ASSERT(impl->is_allocated(), str_input_not_allocated); - auto impl_mem = impl->getMemory(); + auto impl_mem = impl->get_memory(); auto src_ptr = user_blob->cbuffer().as(); // If device mem already exists, we can reuse blob if buffer has usm_host type and points to the same memory, // so we don't need to allocate new memory @@ -875,7 +880,7 @@ void InferRequest::allocate_dev_mem_if_needed(InferenceEngine::BlobMap& device_m device_mems[blob_name] = create_shared_device_blob(user_blob->getTensorDesc(), layout, user_blob->buffer().as()); } else if (need_lockable_mem) { device_mems[blob_name] = - create_remote_blob(user_blob->getTensorDesc(), layout, RemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL); + create_remote_blob(user_blob->getTensorDesc(), layout, BlobType::BT_USM_HOST_INTERNAL); } else { device_mems[blob_name] = create_device_blob(user_blob->getTensorDesc()); } @@ -894,7 +899,7 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr auto remote_ptr = inputBlob->as(); auto& stream = m_graph->GetNetwork()->get_stream(); const bool is_dev_input = remote_ptr != nullptr; - const bool can_use_usm = m_graph->GetEngine()->use_unified_shared_memory(); + const bool can_use_usm = m_graph->get_engine().use_unified_shared_memory(); auto conv_to_supported_prec = [](Precision::ePrecision prec) { switch (prec) { @@ -951,7 +956,7 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr if (!impl->is_allocated()) { IE_THROW() << str_input_not_allocated; } - auto inputMem = impl->getMemory(); + auto inputMem = impl->get_memory(); auto input_layout = m_graph->GetInputLayouts().find(inputName); if (input_layout != m_graph->GetInputLayouts().end()) { @@ -1003,7 +1008,7 @@ void InferRequest::prepare_output(const cldnn::primitive_id& outputName, Blob::P const auto output_id = outputsMap.at(outputName); const auto output_layout = m_graph->GetNetwork()->get_node_output_layout(output_id); const bool is_static = output_layout.is_static(); - const bool can_use_usm = m_graph->GetEngine()->use_unified_shared_memory(); + const bool can_use_usm = m_graph->get_engine().use_unified_shared_memory(); auto remote_ptr = outputBlob->as(); const bool is_dev_input = remote_ptr != nullptr; @@ -1027,7 +1032,7 @@ void InferRequest::prepare_output(const cldnn::primitive_id& outputName, Blob::P if (!impl->is_allocated()) { IE_THROW(NotAllocated) << str_output_not_allocated; } - auto outputMem = impl->getMemory(); + auto outputMem = impl->get_memory(); _nw_ptr->set_output_memory(internalName, outputMem); } @@ -1038,10 +1043,10 @@ InferenceEngine::Blob::Ptr InferRequest::create_device_blob(const InferenceEngin auto l = cldnn::layout(shape, dt, format); - if (m_graph->GetEngine()->use_unified_shared_memory()) { - return create_remote_blob(desc, l, RemoteBlobImpl::BlobType::BT_USM_DEVICE_INTERNAL); + if (m_graph->get_engine().use_unified_shared_memory()) { + return create_remote_blob(desc, l, BlobType::BT_USM_DEVICE_INTERNAL); } else { - return create_remote_blob(desc, l, RemoteBlobImpl::BlobType::BT_BUF_INTERNAL); + return create_remote_blob(desc, l, BlobType::BT_BUF_INTERNAL); } } @@ -1049,7 +1054,7 @@ std::vector> InferReque std::vector> ret{}; ret.reserve(variables_states_.size()); for (const auto& pair : variables_states_) - ret.push_back(std::make_shared(pair.first, pair.second, m_graph->GetEngine(), m_curBatch)); + ret.push_back(std::make_shared(pair.first, pair.second, m_graph->get_engine(), m_curBatch)); return ret; } diff --git a/src/plugins/intel_gpu/src/plugin/infer_request_legacy.cpp b/src/plugins/intel_gpu/src/plugin/infer_request_legacy.cpp index c0473fbfbf5..89c69affb9e 100644 --- a/src/plugins/intel_gpu/src/plugin/infer_request_legacy.cpp +++ b/src/plugins/intel_gpu/src/plugin/infer_request_legacy.cpp @@ -10,6 +10,8 @@ #include #include "intel_gpu/plugin/infer_request_legacy.hpp" #include "intel_gpu/plugin/remote_context.hpp" +#include "intel_gpu/plugin/remote_blob.hpp" +#include "intel_gpu/plugin/remote_allocators.hpp" #include "intel_gpu/plugin/compiled_model.hpp" #include "intel_gpu/runtime/itt.hpp" #include "intel_gpu/plugin/variable_state.hpp" @@ -289,7 +291,7 @@ void InferRequestLegacy::SetBlob(const std::string& name, const Blob::Ptr& data) bool is_nv12 = nv12_ptr != nullptr; int expected_batch = is_batched ? desc.getDims()[0] : 1; if (ColorFormat::NV12 == foundInput->getPreProcess().getColorFormat() && - m_graph->getConfig().nv12_two_inputs) { + m_graph->get_config().get_property(ov::intel_gpu::nv12_two_inputs)) { // try extracting Y and UV remote blobs from it // and put them into appropriate network inputs // that should then go into biplanar NV12 reorder @@ -500,7 +502,7 @@ void InferRequestLegacy::checkBlobs() { auto node = findInputByNodeName(input.first); bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic()); if (!is_dynamic) - checkInputBlob(input.second, input.first, foundInput, m_graph->getConfig().nv12_two_inputs); + checkInputBlob(input.second, input.first, foundInput, m_graph->get_config().get_property(ov::intel_gpu::nv12_two_inputs)); } for (auto const &output : _outputs) { DataPtr foundOutput = nullptr; @@ -619,6 +621,8 @@ InferRequestLegacy::InferRequestLegacy(InputsDataMap networkInputs, OutputsDataM : IInferRequestInternal(networkInputs, networkOutputs) { IE_ASSERT(nullptr != execNetwork); streamExecutor = dynamic_cast(execNetwork->m_taskExecutor.get()); + m_context = std::dynamic_pointer_cast(execNetwork->GetContext()); + OPENVINO_ASSERT(m_context != nullptr, "[GPU] Can't initialize context of InferRequestLegacy: wrong context type"); } InferRequestLegacy::InferRequestLegacy(const std::vector>& inputs, @@ -627,6 +631,8 @@ InferRequestLegacy::InferRequestLegacy(const std::vector(execNetwork->m_taskExecutor.get()); + m_context = std::dynamic_pointer_cast(execNetwork->GetContext()); + OPENVINO_ASSERT(m_context != nullptr, "[GPU] Can't initialize context of InferRequestLegacy: wrong context type"); } // ----------------------------------------------------------------------------------------- // @@ -698,7 +704,7 @@ void InferRequestLegacy::enqueue() { FormatFromTensorDesc(blobsDesc), tensor_from_dims(blobsDesc.getDims())); - auto mergedBlobs = std::make_shared(m_graph->GetContext(), + auto mergedBlobs = std::make_shared(m_context, m_graph->GetNetwork()->get_stream(), blobsDesc, layout); @@ -914,14 +920,14 @@ Blob::Ptr InferRequestLegacy::create_host_blob(const TensorDesc& desc, std::shar } Blob::Ptr InferRequestLegacy::create_shared_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, void* usm_host_mem) { - auto blob = std::make_shared(m_graph->GetContext(), + auto blob = std::make_shared(m_context, m_graph->GetNetwork()->get_stream(), desc, layout, usm_host_mem, 0, 0, - RemoteBlobImpl::BlobType::BT_USM_SHARED); + BlobType::BT_USM_SHARED); if (!blob) IE_THROW(NotAllocated) << "Failed to allocate shared host <-> device blob"; blob->allocate(); @@ -1009,7 +1015,7 @@ void InferRequestLegacy::allocate_inputs() { const TensorDesc& desc = ni.second->getTensorDesc(); bool is_nv12_input = ColorFormat::NV12 == ni.second->getPreProcess().getColorFormat() && - m_graph->getConfig().nv12_two_inputs; + m_graph->get_config().get_property(ov::intel_gpu::nv12_two_inputs); auto parameter = std::find_if(_parameters.begin(), _parameters.end(), [&](const std::shared_ptr& node) { return node->get_friendly_name() == name; @@ -1040,10 +1046,10 @@ void InferRequestLegacy::allocate_inputs() { Blob::Ptr inputBlob = create_host_blob(desc); _inputs[name] = inputBlob; } else { - if (m_graph->GetEngine()->use_unified_shared_memory()) { + if (m_graph->get_engine().use_unified_shared_memory()) { // For USM case we create host blob using custom USM host allocator // and then create shared device blob on top of this buffer - auto host_blob = create_host_blob(desc, std::make_shared(m_graph->GetContext().get())); + auto host_blob = create_host_blob(desc, std::make_shared(m_context)); _inputs[name] = host_blob; _deviceInputs[name] = create_shared_device_blob(desc, litr->second, host_blob->buffer().as()); } else { @@ -1103,10 +1109,10 @@ void InferRequestLegacy::allocate_outputs() { auto device_blob = create_device_blob(device_blob_desc, output_layout); _deviceOutputs[no.first] = device_blob; } else { - if (m_graph->GetEngine()->use_unified_shared_memory()) { + if (m_graph->get_engine().use_unified_shared_memory()) { // For USM case we create host blob using custom USM host allocator // and then create shared device blob on top of this buffer - auto host_blob = create_host_blob(desc, std::make_shared(m_graph->GetContext().get())); + auto host_blob = create_host_blob(desc, std::make_shared(m_context)); _outputs[no.first] = host_blob; _deviceOutputs[no.first] = create_shared_device_blob(desc, output_layout, host_blob->buffer().as()); } else { @@ -1183,7 +1189,7 @@ void InferRequestLegacy::prepare_input(const cldnn::primitive_id& inputName, Blo if (!impl->is_allocated()) { IE_THROW() << str_input_not_allocated; } - auto inputMem = impl->getMemory(); + auto inputMem = impl->get_memory(); auto input_layout = m_graph->GetInputLayouts().find(inputName); if (input_layout != m_graph->GetInputLayouts().end()) { @@ -1241,25 +1247,25 @@ void InferRequestLegacy::prepare_output(const cldnn::primitive_id& outputName, B if (!impl->is_allocated()) { IE_THROW(NotAllocated) << str_output_not_allocated; } - auto outputMem = impl->getMemory(); + auto outputMem = impl->get_memory(); _nw_ptr->set_output_memory(internalName, outputMem); } InferenceEngine::Blob::Ptr InferRequestLegacy::create_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout) { - if (m_graph->GetEngine()->use_unified_shared_memory()) { - auto blobPtr = std::make_shared(m_graph->GetContext(), + if (m_graph->get_engine().use_unified_shared_memory()) { + auto blobPtr = std::make_shared(m_context, m_graph->GetNetwork()->get_stream(), desc, layout, nullptr, 0, 0, - RemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL); + BlobType::BT_USM_HOST_INTERNAL); getBlobImpl(blobPtr.get())->allocate(); checkAlloc(blobPtr, str_device_mem_not_allocated); return blobPtr; } else { - auto blobPtr = std::make_shared(m_graph->GetContext(), + auto blobPtr = std::make_shared(m_context, m_graph->GetNetwork()->get_stream(), desc, layout); @@ -1273,7 +1279,7 @@ std::vector> InferReque std::vector> ret{}; ret.reserve(variables_states_.size()); for (const auto& pair : variables_states_) - ret.push_back(std::make_shared(pair.first, pair.second, m_graph->GetEngine(), m_curBatch)); + ret.push_back(std::make_shared(pair.first, pair.second, m_graph->get_engine(), m_curBatch)); return ret; } diff --git a/src/plugins/intel_gpu/src/plugin/legacy_api_helper.cpp b/src/plugins/intel_gpu/src/plugin/legacy_api_helper.cpp new file mode 100644 index 00000000000..938dcebb857 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/legacy_api_helper.cpp @@ -0,0 +1,272 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/plugin/legacy_api_helper.hpp" +#include "ie_plugin_config.hpp" +#include "gpu/gpu_config.hpp" + +namespace ov { +namespace intel_gpu { + +bool LegacyAPIHelper::is_new_api_property(const std::pair& property) { + static const std::vector new_properties_list = { + ov::intel_gpu::hint::queue_priority.name(), + ov::intel_gpu::hint::queue_throttle.name(), + ov::hint::inference_precision.name(), + ov::compilation_num_threads.name(), + ov::num_streams.name(), + }; + + return std::find(new_properties_list.begin(), new_properties_list.end(), property.first) != new_properties_list.end(); +} + +bool LegacyAPIHelper::is_legacy_property(const std::pair& property, bool is_new_api) { + static const std::vector legacy_properties_list = { + InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, + InferenceEngine::GPUConfigParams::KEY_GPU_MAX_NUM_THREADS, + InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY, + InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE, + }; + + static const std::vector legacy_property_values_list = { + InferenceEngine::PluginConfigParams::KEY_MODEL_PRIORITY, + InferenceEngine::GPUConfigParams::KEY_GPU_HOST_TASK_PRIORITY, + }; + + bool legacy_property = std::find(legacy_properties_list.begin(), legacy_properties_list.end(), property.first) != legacy_properties_list.end(); + bool need_value_conversion = !is_new_api && + std::find(legacy_property_values_list.begin(), legacy_property_values_list.end(), property.first) != legacy_property_values_list.end(); + + return legacy_property || need_value_conversion; +} + +ov::AnyMap LegacyAPIHelper::convert_legacy_properties(const std::map& properties, bool is_new_api) { + return convert_legacy_properties(ov::AnyMap(properties.begin(), properties.end()), is_new_api); +} + +ov::AnyMap LegacyAPIHelper::convert_legacy_properties(const ov::AnyMap& properties, bool is_new_api) { + ov::AnyMap converted_properties; + for (auto& property : properties) { + if (is_legacy_property(property, is_new_api)) { + auto new_property = convert_legacy_property(property); + converted_properties[new_property.first] = new_property.second; + } else { + converted_properties[property.first] = property.second; + } + } + + return converted_properties; +} + +std::pair LegacyAPIHelper::convert_legacy_property(const std::pair& legacy_property) { + auto legacy_name = legacy_property.first; + if (legacy_name == InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS) { + ov::Any converted_val{legacy_property.second}; + auto legacy_val = legacy_property.second.as(); + if (legacy_val == InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO) + converted_val = ov::streams::AUTO; + + return { ov::num_streams.name(), converted_val }; + } else if (legacy_name == InferenceEngine::PluginConfigParams::KEY_MODEL_PRIORITY) { + ov::Any converted_val{nullptr}; + auto legacy_val = legacy_property.second.as(); + if (legacy_val == InferenceEngine::PluginConfigParams::MODEL_PRIORITY_HIGH) { + converted_val = ov::hint::Priority::HIGH; + } else if (legacy_val == InferenceEngine::PluginConfigParams::MODEL_PRIORITY_MED) { + converted_val = ov::hint::Priority::MEDIUM; + } else if (legacy_val == InferenceEngine::PluginConfigParams::MODEL_PRIORITY_LOW) { + converted_val = ov::hint::Priority::LOW; + } else { + converted_val = legacy_val; + } + + return { ov::hint::model_priority.name(), converted_val }; + } else if (legacy_name == InferenceEngine::GPUConfigParams::KEY_GPU_MAX_NUM_THREADS) { + return { ov::compilation_num_threads.name(), legacy_property.second }; + } else if (legacy_name == InferenceEngine::GPUConfigParams::KEY_GPU_HOST_TASK_PRIORITY) { + ov::Any converted_val{nullptr}; + auto legacy_val = legacy_property.second.as(); + if (legacy_val == InferenceEngine::GPUConfigParams::GPU_HOST_TASK_PRIORITY_HIGH) { + converted_val = ov::hint::Priority::HIGH; + } else if (legacy_val == InferenceEngine::GPUConfigParams::GPU_HOST_TASK_PRIORITY_MEDIUM) { + converted_val = ov::hint::Priority::MEDIUM; + } else if (legacy_val == InferenceEngine::GPUConfigParams::GPU_HOST_TASK_PRIORITY_LOW) { + converted_val = ov::hint::Priority::LOW; + } else { + converted_val = legacy_val; + } + return { ov::intel_gpu::hint::host_task_priority.name(), converted_val }; + } else if (legacy_name == InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY) { + ov::Any converted_val{nullptr}; + auto legacy_val = legacy_property.second.as(); + if (!legacy_val.empty()) { + std::stringstream ss(legacy_val); + uint32_t uVal(0); + ss >> uVal; + OPENVINO_ASSERT(!ss.fail(), "[GPU] Unsupported property value by plugin: ", legacy_val); + switch (uVal) { + case 0: + case 2: + converted_val = ov::hint::Priority::MEDIUM; + break; + case 1: + converted_val = ov::hint::Priority::LOW; + break; + case 3: + converted_val = ov::hint::Priority::HIGH; + break; + default: + OPENVINO_ASSERT(false, "[GPU] Unsupported queue priority value ", uVal); + } + } + + return { ov::intel_gpu::hint::queue_priority.name(), converted_val }; + } else if (legacy_name == InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE) { + ov::Any converted_val{nullptr}; + auto legacy_val = legacy_property.second.as(); + if (!legacy_val.empty()) { + std::stringstream ss(legacy_val); + uint32_t uVal(0); + ss >> uVal; + OPENVINO_ASSERT(!ss.fail(), "[GPU] Unsupported property value by plugin: ", legacy_val); + switch (uVal) { + case 0: + case 2: + converted_val = ov::intel_gpu::hint::ThrottleLevel::MEDIUM; + break; + case 1: + converted_val = ov::intel_gpu::hint::ThrottleLevel::LOW; + break; + case 3: + converted_val = ov::intel_gpu::hint::ThrottleLevel::HIGH; + break; + default: + OPENVINO_ASSERT(false, "[GPU] Unsupported queue throttle value ", uVal); + } + } + + return { ov::intel_gpu::hint::queue_throttle.name(), converted_val }; + } + + OPENVINO_ASSERT(false, "[GPU] Unhandled legacy property in convert_legacy_property method: ", legacy_property.first); +} + +std::pair LegacyAPIHelper::convert_to_legacy_property(const std::pair& property) { + auto name = property.first; + if (name == ov::num_streams.name()) { + ov::Any legacy_val{property.second}; + if (!property.second.empty()) { + if (property.second.as() == ov::streams::AUTO) { + legacy_val = InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO; + } + } + + return { InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, legacy_val }; + } else if (name == ov::hint::model_priority.name()) { + ov::Any legacy_val{nullptr}; + if (!property.second.empty()) { + ov::hint::Priority val = property.second.as(); + switch (val) { + case ov::hint::Priority::LOW: legacy_val = InferenceEngine::PluginConfigParams::MODEL_PRIORITY_LOW; break; + case ov::hint::Priority::MEDIUM: legacy_val = InferenceEngine::PluginConfigParams::MODEL_PRIORITY_MED; break; + case ov::hint::Priority::HIGH: legacy_val = InferenceEngine::PluginConfigParams::MODEL_PRIORITY_HIGH; break; + default: OPENVINO_ASSERT(false, "[GPU] Unsupported model priority value ", val); + } + } + + return { InferenceEngine::PluginConfigParams::KEY_MODEL_PRIORITY, legacy_val }; + } else if (name == ov::compilation_num_threads.name()) { + return { InferenceEngine::GPUConfigParams::KEY_GPU_MAX_NUM_THREADS, property.second }; + } else if (name == ov::intel_gpu::hint::host_task_priority.name()) { + ov::Any legacy_val{nullptr}; + if (!property.second.empty()) { + ov::hint::Priority val = property.second.as(); + switch (val) { + case ov::hint::Priority::LOW: legacy_val = InferenceEngine::GPUConfigParams::GPU_HOST_TASK_PRIORITY_LOW; break; + case ov::hint::Priority::MEDIUM: legacy_val = InferenceEngine::GPUConfigParams::GPU_HOST_TASK_PRIORITY_MEDIUM; break; + case ov::hint::Priority::HIGH: legacy_val = InferenceEngine::GPUConfigParams::GPU_HOST_TASK_PRIORITY_HIGH; break; + default: OPENVINO_ASSERT(false, "[GPU] Unsupported host task priority value ", val); + } + } + + return { InferenceEngine::PluginConfigParams::KEY_MODEL_PRIORITY, legacy_val }; + } else if (name == ov::intel_gpu::hint::queue_priority.name()) { + ov::Any legacy_val{nullptr}; + if (!property.second.empty()) { + ov::hint::Priority val = property.second.as(); + switch (val) { + case ov::hint::Priority::LOW: legacy_val = "1"; break; + case ov::hint::Priority::MEDIUM: legacy_val = "2"; break; + case ov::hint::Priority::HIGH: legacy_val = "3"; break; + default: OPENVINO_ASSERT(false, "[GPU] Unsupported queue throttle value ", val); + } + } + + return { InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY, legacy_val }; + } else if (name == ov::intel_gpu::hint::queue_throttle.name()) { + ov::Any legacy_val{nullptr}; + if (!property.second.empty()) { + ov::intel_gpu::hint::ThrottleLevel val = property.second.as(); + switch (val) { + case ov::intel_gpu::hint::ThrottleLevel::LOW: legacy_val = "1"; break; + case ov::intel_gpu::hint::ThrottleLevel::MEDIUM: legacy_val = "2"; break; + case ov::intel_gpu::hint::ThrottleLevel::HIGH: legacy_val = "3"; break; + default: OPENVINO_ASSERT(false, "[GPU] Unsupported queue throttle value ", val); + } + } + return { InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE, legacy_val }; + } + + OPENVINO_ASSERT(false, "[GPU] Unhandled legacy property in convert_to_legacy_property method: ", property.first); +} + +std::vector LegacyAPIHelper::get_supported_configs() { + static const std::vector supported_config = { + CONFIG_KEY(MODEL_PRIORITY), + CONFIG_KEY(PERFORMANCE_HINT), + CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS), + CONFIG_KEY(PERF_COUNT), + CONFIG_KEY(DYN_BATCH_ENABLED), + CONFIG_KEY(CONFIG_FILE), + CONFIG_KEY(DEVICE_ID), + CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), + CONFIG_KEY(CACHE_DIR), + CONFIG_KEY(GPU_THROUGHPUT_STREAMS), + GPU_CONFIG_KEY(PLUGIN_PRIORITY), + GPU_CONFIG_KEY(PLUGIN_THROTTLE), + GPU_CONFIG_KEY(HOST_TASK_PRIORITY), + GPU_CONFIG_KEY(NV12_TWO_INPUTS), + GPU_CONFIG_KEY(MAX_NUM_THREADS), + GPU_CONFIG_KEY(ENABLE_LOOP_UNROLLING), + }; + + return supported_config; +} + +std::vector LegacyAPIHelper::get_supported_metrics(bool model_caching_enabled) { + std::vector supported_metrics = { + METRIC_KEY(AVAILABLE_DEVICES), + METRIC_KEY(SUPPORTED_METRICS), + METRIC_KEY(FULL_DEVICE_NAME), + METRIC_KEY(OPTIMIZATION_CAPABILITIES), + METRIC_KEY(SUPPORTED_CONFIG_KEYS), + METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS), + METRIC_KEY(RANGE_FOR_STREAMS), + METRIC_KEY(DEVICE_TYPE), + METRIC_KEY(DEVICE_GOPS), + METRIC_KEY(OPTIMAL_BATCH_SIZE), + METRIC_KEY(MAX_BATCH_SIZE), + GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE), + GPU_METRIC_KEY(UARCH_VERSION), + GPU_METRIC_KEY(EXECUTION_UNITS_COUNT), + GPU_METRIC_KEY(MEMORY_STATISTICS), + }; + if (model_caching_enabled) + supported_metrics.push_back(METRIC_KEY(IMPORT_EXPORT_SUPPORT)); + + return supported_metrics; +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/adaptive_pooling.cpp b/src/plugins/intel_gpu/src/plugin/ops/adaptive_pooling.cpp index 895a69036ab..b9456b4c109 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/adaptive_pooling.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/adaptive_pooling.cpp @@ -38,7 +38,7 @@ static void CreateAdaptiveMaxPoolOp(Program& p, const std::shared_ptr lock{mem, stream}; auto buf = lock.data(); auto bufSize = constLayout.bytes_count(); diff --git a/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp b/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp index c49faa12cec..a69315d47af 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp @@ -314,7 +314,7 @@ static void DeformableConvolutionImpl(Program& p, std::vector weights = {inputs[2].pid}; // Remove weights from inputs inputs.erase(inputs.begin() + 2); - auto device_info = p.GetEngine().get_device_info(); + auto device_info = p.get_engine().get_device_info(); bool supports_subgroups = device_info.supports_khr_subgroups || device_info.supports_intel_subgroups; if (groups == 1 && supports_subgroups) { std::string defConvLayerNameInterp = layerName + "_interp"; diff --git a/src/plugins/intel_gpu/src/plugin/ops/ctc_greedy_decoder.cpp b/src/plugins/intel_gpu/src/plugin/ops/ctc_greedy_decoder.cpp index 21fc053f7e4..1220e7bb287 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/ctc_greedy_decoder.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/ctc_greedy_decoder.cpp @@ -74,7 +74,7 @@ static void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptrget_output_shape(1))); GPU_DEBUG_LOG << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl; - shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayout)); + shared_memory.emplace_back(p.get_engine().allocate_memory(mutableLayout)); cldnn::primitive_id ctc_gd_mutable_id_w = layer_type_name_ID(op) + "_md_write"; auto ctc_gd_mutable_prim = cldnn::mutable_data(ctc_gd_mutable_id_w, diff --git a/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_detection_output.cpp b/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_detection_output.cpp index 9d0194b36ff..e8d96af7d2b 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_detection_output.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_detection_output.cpp @@ -33,7 +33,7 @@ static void CreateExperimentalDetectronDetectionOutputOp( const cldnn::layout mutable_layout1{cldnn::element_type_to_data_type(mutable_precision1), cldnn::format::get_default_format(output_shape1.size()), tensor_from_dims(output_shape1)}; - cldnn::memory::ptr shared_memory1{p.GetEngine().allocate_memory(mutable_layout1)}; + cldnn::memory::ptr shared_memory1{p.get_engine().allocate_memory(mutable_layout1)}; const auto mutable_id_w1 = layer_type_name + "_md_write.1"; const cldnn::mutable_data mutable_prim_w{mutable_id_w1, shared_memory1}; @@ -45,7 +45,7 @@ static void CreateExperimentalDetectronDetectionOutputOp( const cldnn::layout mutable_layout2{cldnn::element_type_to_data_type(mutable_precision2), cldnn::format::get_default_format(output_shape2.size()), tensor_from_dims(output_shape2)}; - cldnn::memory::ptr shared_memory2{p.GetEngine().allocate_memory(mutable_layout2)}; + cldnn::memory::ptr shared_memory2{p.get_engine().allocate_memory(mutable_layout2)}; const auto mutable_id_w2 = layer_type_name + "_md_write.2"; const cldnn::mutable_data mutable_prim_w2{mutable_id_w2, shared_memory2}; diff --git a/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_generate_proposals_single_image.cpp b/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_generate_proposals_single_image.cpp index 4a1bf51eaa7..18fdc1a9d9a 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_generate_proposals_single_image.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_generate_proposals_single_image.cpp @@ -33,7 +33,7 @@ static void CreateExperimentalDetectronGenerateProposalsSingleImageOp( const cldnn::layout mutable_layout{cldnn::element_type_to_data_type(mutable_precision), cldnn::format::get_default_format(output_shape.size()), tensor_from_dims(output_shape)}; - cldnn::memory::ptr shared_memory{p.GetEngine().allocate_memory(mutable_layout)}; + cldnn::memory::ptr shared_memory{p.get_engine().allocate_memory(mutable_layout)}; const auto mutable_id_w = layer_type_name + "_md_write"; const cldnn::mutable_data mutable_prim_w{mutable_id_w, shared_memory}; diff --git a/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_roi_feature_extractor.cpp b/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_roi_feature_extractor.cpp index d3efa12cc8d..13f3a92bf28 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_roi_feature_extractor.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_roi_feature_extractor.cpp @@ -22,7 +22,7 @@ static void CreateExperimentalDetectronROIFeatureExtractorOp(Program& p, const s cldnn::format::get_default_format(op->get_output_shape(1).size()), tensor_from_dims(op->get_output_shape(1))); - cldnn::memory::ptr shared_memory {p.GetEngine().allocate_memory(mutableLayout)}; + cldnn::memory::ptr shared_memory {p.get_engine().allocate_memory(mutableLayout)}; cldnn::primitive_id experimental_detectron_mutable_id_w = layer_type_name_ID(op) + "_md_write"; cldnn::mutable_data experimenta_detectron_mutable_prim(experimental_detectron_mutable_id_w, diff --git a/src/plugins/intel_gpu/src/plugin/ops/generate_proposals.cpp b/src/plugins/intel_gpu/src/plugin/ops/generate_proposals.cpp index 16129ef1d43..c0fcb2ff17d 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/generate_proposals.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/generate_proposals.cpp @@ -32,7 +32,7 @@ static void CreateGenerateProposalsIEInternalOp( const cldnn::layout mutable_layout_1{cldnn::element_type_to_data_type(mutable_precision_1), cldnn::format::get_default_format(output_shape_1.size()), tensor_from_dims(output_shape_1)}; - cldnn::memory::ptr shared_memory_1{p.GetEngine().allocate_memory(mutable_layout_1)}; + cldnn::memory::ptr shared_memory_1{p.get_engine().allocate_memory(mutable_layout_1)}; const auto mutable_id_w_1 = layer_type_name + "_md_write.1"; const cldnn::mutable_data mutable_prim_w_1{mutable_id_w_1, shared_memory_1}; @@ -45,7 +45,7 @@ static void CreateGenerateProposalsIEInternalOp( const cldnn::layout mutable_layout_2{cldnn::element_type_to_data_type(mutable_precision_2), cldnn::format::get_default_format(output_shape_2.size()), tensor_from_dims(output_shape_2)}; - cldnn::memory::ptr shared_memory_2{p.GetEngine().allocate_memory(mutable_layout_2)}; + cldnn::memory::ptr shared_memory_2{p.get_engine().allocate_memory(mutable_layout_2)}; const auto mutable_id_w_2 = layer_type_name + "_md_write.2"; const cldnn::mutable_data mutable_prim_w_2{mutable_id_w_2, shared_memory_2}; diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index 8ac909f2d82..f8b3091cb08 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -29,8 +29,8 @@ namespace intel_gpu { template static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) { - auto mem = p.GetEngine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } }); - cldnn::mem_lock ptr{mem, p.GetEngine().get_program_stream()}; + auto mem = p.get_engine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } }); + cldnn::mem_lock ptr{mem, p.get_engine().get_service_stream()}; *ptr.begin() = num; return {id, mem}; } @@ -42,7 +42,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha const auto format = cldnn::format::get_default_format(op->get_output_shape(output_idx).size()); const auto tensor = tensor_from_dims(op->get_output_shape(output_idx)); cldnn::layout output_layout = cldnn::layout(precision, format, tensor); - auto mem = p.GetEngine().allocate_memory(output_layout); + auto mem = p.get_engine().allocate_memory(output_layout); auto md = cldnn::mutable_data(id, {cldnn::input_info(input)}, mem); // cldnn::data cannot set dependency return md; } @@ -82,7 +82,7 @@ static void CreateLoopOp(Program& p, const std::shared_ptr& op) { } // get body topology from ngraph function - Program body_program(body_network, p.GetEnginePtr(), p.GetConfig(), true); + Program body_program(body_network, p.get_engine(), p.get_config(), true); auto body_topology = *body_program.GetTopology(); // setup input_primitive_maps/ output_primitive_maps and back_edges diff --git a/src/plugins/intel_gpu/src/plugin/ops/matrix_nms.cpp b/src/plugins/intel_gpu/src/plugin/ops/matrix_nms.cpp index 3f3438b1100..d3465b57958 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/matrix_nms.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/matrix_nms.cpp @@ -34,7 +34,7 @@ void CreateNmsStaticShapeIE8Op(Program& p, const std::shared_ptr(outputIndices), 1, 1, 1)); - shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutFirst)); + shared_memory.emplace_back(p.get_engine().allocate_memory(mutableLayoutFirst)); cldnn::primitive_id matrix_nms_mutable_id_w_first = layer_type_name_ID(op) + "_md_write_first"; auto matrix_nms_mutable_prim_first = cldnn::mutable_data(matrix_nms_mutable_id_w_first, shared_memory.back()); @@ -46,7 +46,7 @@ void CreateNmsStaticShapeIE8Op(Program& p, const std::shared_ptr(batches_num), 1, 1, 1)); - shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutSecond)); + shared_memory.emplace_back(p.get_engine().allocate_memory(mutableLayoutSecond)); cldnn::primitive_id matrix_nms_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second"; auto matrix_nms_mutable_prim_second = cldnn::mutable_data(matrix_nms_mutable_id_w_second, shared_memory.back()); diff --git a/src/plugins/intel_gpu/src/plugin/ops/multiclass_nms.cpp b/src/plugins/intel_gpu/src/plugin/ops/multiclass_nms.cpp index 46339686d11..7b7a09f128a 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/multiclass_nms.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/multiclass_nms.cpp @@ -32,7 +32,7 @@ static void CreateMulticlassNmsIEInternalOp(Program& p, const std::shared_ptrget_output_shape(2))); GPU_DEBUG_LOG << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl; - shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutSecond)); + shared_memory.emplace_back(p.get_engine().allocate_memory(mutableLayoutSecond)); cldnn::primitive_id non_max_supression_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second"; auto nms_mutable_prim_second = cldnn::mutable_data(non_max_supression_mutable_id_w_second, @@ -121,7 +121,7 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt cldnn::tensor(static_cast(outputIndices), 3, 1, 1)); GPU_DEBUG_LOG << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl; - shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutFirst)); + shared_memory.emplace_back(p.get_engine().allocate_memory(mutableLayoutFirst)); cldnn::primitive_id non_max_supression_mutable_id_w_first = layer_type_name_ID(op) + "_md_write_first"; auto nms_mutable_prim_first = cldnn::mutable_data(non_max_supression_mutable_id_w_first, diff --git a/src/plugins/intel_gpu/src/plugin/ops/normalize_l2.cpp b/src/plugins/intel_gpu/src/plugin/ops/normalize_l2.cpp index aedbf3e02f6..4c828c5b84f 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/normalize_l2.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/normalize_l2.cpp @@ -36,8 +36,8 @@ static void CreateNormalizeL2Op(Program& p, const std::shared_ptr(op->get_output_element_type(0), ngraph::Shape{1}, std::vector{1.0}); cldnn::layout constLayout = cldnn::layout(cldnn::element_type_to_data_type(op->get_output_element_type(0)), cldnn::format::bfyx, cldnn::tensor{1}); - auto mem = p.GetEngine().allocate_memory(constLayout, false); - cldnn::mem_lock tmpPointer{mem, p.GetEngine().get_program_stream()}; + auto mem = p.get_engine().allocate_memory(constLayout, false); + cldnn::mem_lock tmpPointer{mem, p.get_engine().get_service_stream()}; auto buf = tmpPointer.data(); auto bufSize = scale->get_output_tensor(0).size(); diff --git a/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp b/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp index 75f2cb56297..d25748a5e1e 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp @@ -108,8 +108,8 @@ static void CreateParameterOp(Program& p, const std::shared_ptrsecond; } else { - auto mem = p.GetEngine().allocate_memory(meanBlobLayout, false); - cldnn::mem_lock tmpPointer{ mem, p.GetEngine().get_program_stream() }; + auto mem = p.get_engine().allocate_memory(meanBlobLayout, false); + cldnn::mem_lock tmpPointer{ mem, p.get_engine().get_service_stream() }; auto buf = tmpPointer.data(); auto bufSize = meanBlobLayout.bytes_count(); @@ -197,7 +197,7 @@ static void CreateParameterOp(Program& p, const std::shared_ptrname(), networkInputLayout }); p.add_primitive(*op, cldnn::input_layout(inputName, networkInputLayout)); } else { - if (ColorFormat::NV12 == preProcess.getColorFormat() && p.GetConfig().nv12_two_inputs) { + if (ColorFormat::NV12 == preProcess.getColorFormat() && p.get_config().get_property(ov::intel_gpu::nv12_two_inputs)) { // for NV12, create two input layouts with reorder instead of one, // and then would expect compound blob in inferRequest if (InferenceEngine::Layout::NCHW != l && diff --git a/src/plugins/intel_gpu/src/plugin/ops/pooling.cpp b/src/plugins/intel_gpu/src/plugin/ops/pooling.cpp index 6382c4847bd..32112d4ff9d 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/pooling.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/pooling.cpp @@ -90,7 +90,7 @@ static void CreateMaxPoolOp(Program& p, const std::shared_ptrget_output_shape(1))); GPU_DEBUG_LOG << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl; - auto shared_memory = p.GetEngine().allocate_memory(mutableLayout); + auto shared_memory = p.get_engine().allocate_memory(mutableLayout); cldnn::primitive_id proposal_mutable_id_w = layer_type_name_ID(op) + "_md_write"; auto argmax_mutable_prim = cldnn::mutable_data(proposal_mutable_id_w, diff --git a/src/plugins/intel_gpu/src/plugin/ops/tensor_iterator.cpp b/src/plugins/intel_gpu/src/plugin/ops/tensor_iterator.cpp index 2a8f7c5bdb8..bad0e2d7bac 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/tensor_iterator.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/tensor_iterator.cpp @@ -28,8 +28,8 @@ namespace intel_gpu { template static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) { - auto mem = p.GetEngine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } }); - cldnn::mem_lock ptr{mem, p.GetEngine().get_program_stream()}; + auto mem = p.get_engine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } }); + cldnn::mem_lock ptr{mem, p.get_engine().get_service_stream()}; *ptr.begin() = num; return {id, mem}; } @@ -41,7 +41,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha const auto format = cldnn::format::get_default_format(op->get_output_shape(output_idx).size()); const auto tensor = tensor_from_dims(op->get_output_shape(output_idx)); cldnn::layout output_layout = cldnn::layout(precision, format, tensor); - auto mem = p.GetEngine().allocate_memory(output_layout); + auto mem = p.get_engine().allocate_memory(output_layout); auto md = cldnn::mutable_data(id, {cldnn::input_info(input)}, mem); // cldnn::data cannot set dependency return md; } @@ -51,7 +51,7 @@ static void CreateTensorIteratorOp(Program &p, const std::shared_ptrget_body()); - Program body_program(body_network, p.GetEnginePtr(), p.GetConfig(), true); + Program body_program(body_network, p.get_engine(), p.get_config(), true); auto body_topology = *body_program.GetTopology(); // setup input_primitive_maps/ output_primitive_maps and back_edges diff --git a/src/plugins/intel_gpu/src/plugin/ops/topk.cpp b/src/plugins/intel_gpu/src/plugin/ops/topk.cpp index f7132c83007..4b98db3dd97 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/topk.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/topk.cpp @@ -66,7 +66,7 @@ static void CreateTopKOp(Program& p, const std::shared_ptr tensor_from_dims(op->get_output_shape(1))); GPU_DEBUG_LOG << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl; - auto shared_memory = p.GetEngine().allocate_memory(mutableLayout); + auto shared_memory = p.get_engine().allocate_memory(mutableLayout); cldnn::primitive_id argmax_mutable_id_w = layer_type_name_ID(op) + "_md_write"; auto argmax_mutable_prim = cldnn::mutable_data(argmax_mutable_id_w, diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 6b35f0afc5c..6d6b07f75fc 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -12,7 +12,6 @@ #include #include #include "ie_metric_helpers.hpp" -#include "ie_plugin_config.hpp" #include #include @@ -20,9 +19,13 @@ #include "intel_gpu/plugin/plugin.hpp" #include "intel_gpu/plugin/compiled_model.hpp" #include "intel_gpu/plugin/transformations_pipeline.hpp" -#include "intel_gpu/plugin/custom_layer.hpp" -#include "intel_gpu/plugin/internal_properties.hpp" #include "intel_gpu/runtime/itt.hpp" +#include "intel_gpu/plugin/legacy_api_helper.hpp" +#include "intel_gpu/runtime/execution_config.hpp" +#include "intel_gpu/runtime/device_query.hpp" +#include "intel_gpu/runtime/debug_configuration.hpp" + +#include "ie_plugin_config.hpp" #include "gpu/gpu_config.hpp" #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" #include "ie_icore.hpp" @@ -31,19 +34,12 @@ #include "transformations/init_node_info.hpp" #include "transformations/common_optimizations/dimension_tracking.hpp" #include - #include -#include "openvino/pass/serialize.hpp" -#include "openvino/pass/manager.hpp" -#include + +#include #include -#include "intel_gpu/runtime/device_query.hpp" -#include "intel_gpu/runtime/debug_configuration.hpp" #include -#ifdef __linux__ -# include -#endif // Undef DEVICE_TYPE macro which can be defined somewhere in windows headers as DWORD and conflict with our metric #ifdef DEVICE_TYPE @@ -67,17 +63,29 @@ namespace intel_gpu { #include "intel_gpu/plugin/primitives_list.hpp" #undef REGISTER_FACTORY -void Plugin::RegisterPrimitives() { +void Plugin::register_primitives() { #define REGISTER_FACTORY(op_version, op_name) FACTORY_CALL(op_version, op_name) #include "intel_gpu/plugin/primitives_list.hpp" #undef REGISTER_FACTORY } -struct Plugin::impl { - Configs m_configs; -}; +ov::AnyMap Plugin::preprocess_config(const std::map& orig_config) const { + // We can skip this conversion for new API once all meta plugins don't try to use legacy configs/metrics for new API internally + auto config = LegacyAPIHelper::convert_legacy_properties(orig_config, IsNewAPI()); -std::string Plugin::GetDeviceIDFromConfig(const std::map& config) const { + // Code below is WA for issue 100498 + auto hint_it = std::find_if(orig_config.begin(), orig_config.end(), [](const std::pair& kv) { + return kv.first == ov::hint::performance_mode.name(); + }); + + if (hint_it != orig_config.end()) { + config[ov::hint::performance_mode.name()] = ov::util::from_string(hint_it->second, ov::hint::performance_mode); + } + + return config; +} + +std::string Plugin::get_device_id_from_config(const std::map& config) const { std::string device_id; if (config.find(PluginConfigParams::KEY_DEVICE_ID) != config.end()) { device_id = config.at(PluginConfigParams::KEY_DEVICE_ID); @@ -85,35 +93,30 @@ std::string Plugin::GetDeviceIDFromConfig(const std::map &config) const { - auto device_info = device_map.begin()->second->get_info(); - std::string device_id = GetDeviceIDFromConfig(config); - if (!device_id.empty()) { - if (device_map.find(device_id) == device_map.end()) { - IE_THROW() << "Invalid device ID: " << device_id; - } - device_info = device_map.at(device_id)->get_info(); +std::string Plugin::get_device_id(const std::map& config) const { + std::string device_id = default_device_id; + if (config.find(PluginConfigParams::KEY_DEVICE_ID) != config.end()) { + device_id = config.at(PluginConfigParams::KEY_DEVICE_ID); } - - return device_info; + return device_id; } -void Plugin::TransformNetwork(std::shared_ptr& model, const Config& config) const { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::TransformNetwork"); - auto deviceInfo = GetDeviceInfo(config.key_config_map); +void Plugin::transform_model(std::shared_ptr& model, const ExecutionConfig& config) const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::transform_model"); + auto deviceInfo = device_map.at(config.get_property(ov::device::id))->get_info(); TransformationsPipeline transformations(config, deviceInfo); transformations.apply(model); } -InferenceEngine::CNNNetwork Plugin::CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network, - const Config& config) const { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::CloneAndTransformNetwork"); - GPU_DEBUG_DEFINE_MEM_LOGGER("Plugin::CloneAndTransformNetwork"); +InferenceEngine::CNNNetwork Plugin::clone_and_transform_model(const InferenceEngine::CNNNetwork& network, + const ExecutionConfig& config) const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::clone_and_transform_model"); + GPU_DEBUG_DEFINE_MEM_LOGGER("Plugin::clone_and_transform_model"); CNNNetwork clonedNetwork = InferenceEngine::details::cloneNetwork(network); auto nGraphFunc = clonedNetwork.getFunction(); if (nGraphFunc) { - TransformNetwork(nGraphFunc, config); + transform_model(nGraphFunc, config); GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { auto path_base = debug_config->dump_graphs + "/" + network.getName() + "_" + "transformed_func"; @@ -123,10 +126,9 @@ InferenceEngine::CNNNetwork Plugin::CloneAndTransformNetwork(const InferenceEngi return clonedNetwork; } -Plugin::Plugin() : m_defaultContexts({}) { +Plugin::Plugin() : m_default_contexts({}) { _pluginName = "GPU"; - _impl = std::make_shared(); - RegisterPrimitives(); + register_primitives(); // try loading gpu engine and get info from it { // Set OCL runtime which should be always available @@ -135,35 +137,11 @@ Plugin::Plugin() : m_defaultContexts({}) { // Set default configs for each device for (auto& device : device_map) { - _impl->m_configs.CreateConfig(device.first); + m_configs_map.insert({device.first, ExecutionConfig(ov::device::id(device.first))}); + auto ctx = std::make_shared(GetName() + "." + device.first, std::vector{ device.second }); + m_default_contexts.insert({device.first, ctx}); } } - // locate global custom kernel config - // and auto-load kernels from it -#ifdef _WIN32 - CHAR mpath[MAX_PATH + 1]; - HMODULE nModule; - GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, - (LPCSTR)CustomLayer::LoadFromFile, - &nModule); - GetModuleFileName(nModule, mpath, sizeof(mpath)); -#elif __linux__ - Dl_info dl_info; - dladdr(reinterpret_cast(CustomLayer::LoadFromFile), &dl_info); - const char* mpath = dl_info.dli_fname; -#endif - std::string configFile(mpath); - std::size_t dir_split_pos = configFile.find_last_of("/\\"); - std::string config_path; - - if (dir_split_pos != std::string::npos) { - // path contains directory - config_path = configFile.substr(0, dir_split_pos); - } - config_path += "/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml"; - for (auto& config : _impl->m_configs) { - CustomLayer::LoadFromFile(config_path, config.second.customLayers, true); - } if (const char* env_p = std::getenv("OV_GPU_CACHE_MODEL")) { if (env_p[0] == '1') { @@ -193,67 +171,18 @@ auto check_inputs = [](InferenceEngine::InputsDataMap _networkInputs) { } }; -void Plugin::UpdateConfig(Config& conf, const InferenceEngine::CNNNetwork &network, const std::map ¶ms) const { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::UpdateConfig"); - auto device_info = GetDeviceInfo(params); - conf.enableInt8 = device_info.supports_imad || device_info.supports_immad; - conf.UpdateFromMap(params, device_info); - if (conf.enableDynamicBatch) { - conf.max_dynamic_batch = static_cast(network.getBatchSize()); - } -} - -void Plugin::UpdateStatistics(const RemoteCLContext::Ptr& context) const { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::UpdateStatistics"); +void Plugin::update_memory_statistics(const RemoteContextImpl::Ptr& context) const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::update_memory_statistics"); { std::lock_guard lock(engine_mutex); - std::map statistics; - auto impl = getContextImpl(context); - std::lock_guard locker(*impl); - std::shared_ptr eng = impl->GetEngine(); - statistics = eng->get_memory_statistics(); - // if the same context exists, the statistics is replaced with the latest one // (currently, memory usage is accumulated for several networks in the same context) // if it does not exist, a new statistics is added - statistics_map[context] = statistics; + statistics_map[context] = context->get_engine().get_memory_statistics(); } } -std::map Plugin::ConvertPerfHintsToConfig( - const std::map& network_config, - const Config& plugin_config) const { - // deduces the actual settings from the performance hints and returns fully-defined config - auto config = network_config; - const auto &mode = config.find(PluginConfigParams::KEY_PERFORMANCE_HINT); - // the mode may have just arrived to the LoadNetwork, or was set with the plugins' SetConfig - if (mode != config.end() || !plugin_config.perfHintsConfig.ovPerfHint.empty()) { - const auto mode_name = (mode != config.end()) - ? PerfHintsConfig::CheckPerformanceHintValue(mode->second) - : plugin_config.perfHintsConfig.ovPerfHint; - //checking streams (to avoid overriding what user might explicitly set in the incoming config or previously via SetConfig) - const auto streams = config.find(PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS) == config.end() && - config.find(ov::num_streams.name()) == config.end(); - if (streams && !streamsSet) { - if (mode_name == CONFIG_VALUE(LATENCY)) { - config[PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS] = std::to_string(1); - config[ov::num_streams.name()] = std::to_string(1); - } else if (mode_name == CONFIG_VALUE(THROUGHPUT)) { - config[PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS] = CONFIG_VALUE(GPU_THROUGHPUT_AUTO); - config[ov::num_streams.name()] = ov::util::to_string(ov::streams::AUTO); - //disabling the throttling temporarily to set the validation (that is switching to the hints) perf baseline - //checking throttling (to avoid overriding what user might explicitly set in the incoming config or previously via SetConfig) - // const auto bInConfig = config.find(GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE) != config.end() || - // config.find(CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE) != config.end(); - // if (!bInConfig && !throttlingSet) - // config[GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE] = std::to_string(1); - } - } - } - return config; -} - IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std::map &orig_config) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl"); @@ -261,36 +190,21 @@ IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo(); check_inputs(_networkInputs); - Configs confs = _impl->m_configs; - std::string device_id = GetDeviceIDFromConfig(orig_config); - Config conf = confs.GetConfig(device_id); + std::string device_id = get_device_id(orig_config); - auto config = ConvertPerfHintsToConfig(orig_config, conf); - UpdateConfig(conf, network, config); + auto context = get_default_context(device_id); - RemoteCLContext::Ptr context; + OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] LoadExeNetworkImpl: Couldn't find config for GPU with id ", device_id); - auto canReuseDefaultContext = [&]() -> bool { - if (m_defaultContexts.find(conf.device_id) == m_defaultContexts.end()) - return false; + ExecutionConfig config = m_configs_map.at(device_id); + config.set_user_property(preprocess_config(orig_config)); + config.apply_user_properties(context->get_impl()->get_engine().get_device_info()); - return m_defaultContexts.at(conf.device_id)->GetConfig().CanShareContextWith(conf); - }; - - { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateContext"); - std::lock_guard lock(engine_mutex); - if (!canReuseDefaultContext()) - m_defaultContexts[conf.device_id] = std::make_shared(shared_from_this(), AnyMap(), conf); - } - - context = m_defaultContexts[conf.device_id]; - - auto transformedNetwork = CloneAndTransformNetwork(network, conf); + auto transformedNetwork = clone_and_transform_model(network, config); { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateExeNetwork"); - CompiledModel::Ptr exeNetwork = std::make_shared(transformedNetwork, context, conf); - UpdateStatistics(context); + CompiledModel::Ptr exeNetwork = std::make_shared(transformedNetwork, context, config); + update_memory_statistics(context->get_impl()); return exeNetwork; } } @@ -301,108 +215,98 @@ IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo(); check_inputs(_networkInputs); - auto casted = std::dynamic_pointer_cast(context); - if (nullptr == casted) { - IE_THROW() << "Invalid context"; - } + auto context_impl = get_context_impl(context); + auto device_id = InferenceEngine::DeviceIDParser{context_impl->get_device_name()}.getDeviceID(); - Config conf = getContextImpl(casted)->GetConfig(); - auto config = ConvertPerfHintsToConfig(orig_config, conf); - UpdateConfig(conf, network, config); + OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] LoadExeNetworkImpl: Couldn't find config for GPU with id ", device_id); - auto transformedNetwork = CloneAndTransformNetwork(network, conf); - return std::make_shared(transformedNetwork, casted, conf); + ExecutionConfig config = m_configs_map.at(device_id); + config.set_user_property(preprocess_config(orig_config)); + config.apply_user_properties(context_impl->get_engine().get_device_info()); + + auto transformedNetwork = clone_and_transform_model(network, config); + return std::make_shared(transformedNetwork, context, config); } InferenceEngine::RemoteContext::Ptr Plugin::CreateContext(const AnyMap& params) { - // parameter map is non-empty - std::string contextTypeStr = _StrFromParams(params, GPU_PARAM_KEY(CONTEXT_TYPE)); - - if (GPU_PARAM_VALUE(OCL) == contextTypeStr) { - return std::make_shared(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig()); - } else if (GPU_PARAM_VALUE(VA_SHARED) == contextTypeStr) { -#ifdef _WIN32 - return std::make_shared(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig()); -#else - return std::make_shared(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig()); -#endif - } else { - IE_THROW() << "Invalid remote context type" << contextTypeStr; + if (params.empty()) { + return get_default_context(default_device_id); } + + std::vector known_contexts; + for (auto& c : m_default_contexts) { + known_contexts.push_back(c.second->get_impl()); + } + std::string context_type = extract_object(params, GPU_PARAM_KEY(CONTEXT_TYPE)); + + if (GPU_PARAM_VALUE(OCL) == context_type) { + return std::make_shared(known_contexts, params); + } else if (GPU_PARAM_VALUE(VA_SHARED) == context_type) { +#ifdef _WIN32 + return std::make_shared(known_contexts, params); +#else + return std::make_shared(known_contexts, params); +#endif + } + + OPENVINO_ASSERT(false, "[GPU] Unsupported context type passed to CreateContext method: ", context_type); +} + +RemoteCLContext::Ptr Plugin::get_default_context(const std::string& device_id) const { + OPENVINO_ASSERT(m_default_contexts.find(device_id) != m_default_contexts.end(), "[GPU] Context was not initialized for ", device_id, " device"); + + return m_default_contexts.at(device_id);; } InferenceEngine::RemoteContext::Ptr Plugin::GetDefaultContext(const AnyMap& params) { - RemoteCLContext::Ptr ctx; - std::string device_id = ""; + std::string device_id = default_device_id; if (params.find(CONFIG_KEY(DEVICE_ID)) != params.end()) device_id = params.at(CONFIG_KEY(DEVICE_ID)).as(); - const Config conf = _impl->m_configs.GetConfig(device_id); - - if (m_defaultContexts.find(conf.device_id) != m_defaultContexts.end() && - m_defaultContexts.at(conf.device_id)->GetConfig().CanShareContextWith(conf)) { - ctx = m_defaultContexts.at(conf.device_id); - } else { - ctx = std::make_shared(shared_from_this(), AnyMap(), conf); - } - - return ctx; + return get_default_context(device_id); } void Plugin::SetConfig(const std::map &config) { - streamsSet = config.find(PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS) != config.end() || - config.find(ov::num_streams.name()) != config.end(); - throttlingSet = config.find(GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE) != config.end() || - config.find(ov::intel_gpu::hint::queue_throttle.name()) != config.end(); - std::string device_id; - cldnn::device_info device_info = device_map.begin()->second->get_info(); - if (config.find(PluginConfigInternalParams::KEY_CONFIG_DEVICE_ID) != config.end()) { - device_id = config.at(PluginConfigInternalParams::KEY_CONFIG_DEVICE_ID); - if (!device_id.empty() && device_map.find(device_id) != device_map.end()) { - device_info = device_map.at(device_id)->get_info(); + auto update_config = [this](ExecutionConfig& config, const std::map& user_config) { + config.set_user_property(preprocess_config(user_config)); + // Check that custom layers config can be loaded + if (user_config.find(ov::intel_gpu::config_file.name()) != user_config.end()) { + CustomLayerMap custom_layers; + auto custom_layers_config = user_config.at(ov::intel_gpu::config_file.name()); + CustomLayer::LoadFromFile(custom_layers_config, custom_layers, custom_layers_config.empty()); } - _impl->m_configs.GetConfig(device_id).UpdateFromMap(config, device_info); + }; + + if (config.find(PluginConfigInternalParams::KEY_CONFIG_DEVICE_ID) != config.end()) { + std::string device_id = config.at(PluginConfigInternalParams::KEY_CONFIG_DEVICE_ID); + update_config(m_configs_map.at(device_id), config); } else { - device_id = GetDeviceIDFromConfig(config); + std::string device_id = get_device_id_from_config(config); if (!device_id.empty()) { - if (device_map.find(device_id) != device_map.end()) { - device_info = device_map.at(device_id)->get_info(); - } - _impl->m_configs.SetDefaultDeviceID(device_id); - _impl->m_configs.GetConfig(device_id).UpdateFromMap(config, device_info); + default_device_id = device_id; + update_config(m_configs_map.at(device_id), config); } else { - for (auto& conf : _impl->m_configs) { - if (device_map.find(conf.first) != device_map.end()) { - device_info = device_map.at(conf.first)->get_info(); - } - conf.second.UpdateFromMap(config, device_info); + for (auto& conf : m_configs_map) { + update_config(conf.second, config); } } } } QueryNetworkResult Plugin::QueryNetwork(const CNNNetwork& network, - const std::map& config) const { + const std::map& orig_config) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::QueryNetwork"); QueryNetworkResult res; - Configs confs = _impl->m_configs; - std::string device_id = GetDeviceIDFromConfig(config); - Config conf = confs.GetConfig(device_id); + std::string device_id = get_device_id(orig_config); - UpdateConfig(conf, network, config); + auto ctx = get_default_context(device_id)->get_impl(); - RemoteCLContext::Ptr ctx; - if (m_defaultContexts.find(conf.device_id) != m_defaultContexts.end() && - m_defaultContexts.at(conf.device_id)->GetConfig().CanShareContextWith(conf)) { - ctx = m_defaultContexts.at(conf.device_id); - } else { - ctx = std::make_shared( - std::const_pointer_cast(shared_from_this()), - AnyMap(), conf); - m_defaultContexts[conf.device_id] = ctx; - } - Program prog(ctx->getImpl()->GetEngine(), conf); + ExecutionConfig config = m_configs_map.at(device_id); + config.set_user_property(preprocess_config(orig_config)); + config.apply_user_properties(ctx->get_engine().get_device_info()); + + Program prog(ctx->get_engine(), config); bool dyn_shape_batch_found = false; auto model = network.getFunction(); @@ -415,7 +319,7 @@ QueryNetworkResult Plugin::QueryNetwork(const CNNNetwork& network, std::map shapes; std::map> batch_dim; dyn_shape_batch_found = prog.IsDynBatchModel(model, shapes, batch_dim); - TransformNetwork(model, conf); + transform_model(model, config); }, [&](std::shared_ptr node) { if (node->is_dynamic()) { @@ -450,103 +354,53 @@ QueryNetworkResult Plugin::QueryNetwork(const CNNNetwork& network, }); for (auto&& layerName : supported) { - res.supportedLayersMap.emplace(layerName, GetName() + "." + conf.device_id); + res.supportedLayersMap.emplace(layerName, ctx->get_device_name()); } return res; } InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(std::istream& networkModel, - const std::map& orig_config) { + const std::map& orig_config) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::ImportNetwork"); - Configs confs = _impl->m_configs; - std::string device_id = GetDeviceIDFromConfig(orig_config); - Config conf = confs.GetConfig(device_id); + std::string device_id = get_device_id(orig_config); + auto context = get_default_context(device_id); - auto config = ConvertPerfHintsToConfig(orig_config, conf); - - RemoteCLContext::Ptr context; - - auto canReuseDefaultContext = [&]() -> bool { - if (m_defaultContexts.find(conf.device_id) == m_defaultContexts.end()) - return false; - - return m_defaultContexts.at(conf.device_id)->GetConfig().CanShareContextWith(conf); - }; - - { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::ImportNetwork::CreateContext"); - std::lock_guard lock(engine_mutex); - if (!canReuseDefaultContext()) { - context = std::make_shared(shared_from_this(), AnyMap(), conf); - m_defaultContexts[conf.device_id] = context; - } - } - - context = m_defaultContexts[conf.device_id]; + ExecutionConfig config = m_configs_map.at(device_id); + config.set_user_property(preprocess_config(orig_config)); + config.apply_user_properties(context->get_impl()->get_engine().get_device_info()); { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::ImportNetwork::CreateExeNetwork"); - CompiledModel::Ptr exeNetwork = std::make_shared(networkModel, context, conf); + CompiledModel::Ptr exeNetwork = std::make_shared(networkModel, context, config); exeNetwork->SetPointerToPlugin(shared_from_this()); - UpdateStatistics(context); + update_memory_statistics(context->get_impl()); return exeNetwork; } } Parameter Plugin::GetConfig(const std::string& name, const std::map& options) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::GetConfig"); - Parameter result; - std::string device_id; + std::string device_id = default_device_id; if (options.find(ov::device::id.name()) != options.end()) { device_id = options.find(ov::device::id.name())->second.as(); } - Config config = _impl->m_configs.GetConfig(device_id); - const bool is_new_api = IsNewAPI(); - if (config.key_config_map.find(name) != config.key_config_map.end()) { - std::string val = config.key_config_map.find(name)->second; - if (is_new_api) { - if (name == ov::enable_profiling) { - return val == PluginConfigParams::YES ? true : false; - } else if (name == ov::hint::model_priority) { - return ov::util::from_string(val, ov::hint::model_priority); - } else if (name == ov::intel_gpu::hint::host_task_priority) { - return ov::util::from_string(val, ov::intel_gpu::hint::host_task_priority); - } else if (name == ov::intel_gpu::hint::queue_priority) { - return ov::util::from_string(val, ov::intel_gpu::hint::queue_priority); - } else if (name == ov::intel_gpu::hint::queue_throttle) { - return ov::util::from_string(val, ov::intel_gpu::hint::queue_throttle); - } else if (name == ov::intel_gpu::enable_loop_unrolling) { - return val == PluginConfigParams::YES ? true : false; - } else if (name == ov::cache_dir) { - return ov::util::from_string(val, ov::cache_dir); - } else if (name == ov::hint::performance_mode) { - return ov::util::from_string(val, ov::hint::performance_mode); - } else if (name == ov::compilation_num_threads) { - return ov::util::from_string(val, ov::compilation_num_threads); - } else if (name == ov::num_streams) { - return ov::util::from_string(val, ov::num_streams); - } else if (name == ov::hint::num_requests) { - return ov::util::from_string(val, ov::hint::num_requests); - } else if (name == ov::hint::inference_precision) { - return ov::util::from_string(val, ov::hint::inference_precision); - } else if (name == ov::device::id) { - return ov::util::from_string(val, ov::device::id); - } else { - return val; - } - } else { - if (name == PluginConfigParams::KEY_MODEL_PRIORITY || - name == GPUConfigParams::KEY_GPU_HOST_TASK_PRIORITY) - return Config::ConvertPropertyToLegacy(name, val); - else - return val; - } - } else { - IE_THROW() << "3-Unsupported config key : " << name; + OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] GetConfig: Couldn't find config for GPU with id ", device_id); + + const auto& c = m_configs_map.at(device_id); + auto actual_name = name; + if (LegacyAPIHelper::is_legacy_property({name, nullptr}, IsNewAPI())) { + actual_name = LegacyAPIHelper::convert_legacy_property({name, nullptr}).first; } + + auto val = c.get_property(actual_name); + if (LegacyAPIHelper::is_legacy_property({name, nullptr}, IsNewAPI())) { + val = LegacyAPIHelper::convert_to_legacy_property({actual_name, val}).second; + } + + return val; } auto StringRightTrim = [](std::string string, std::string substring, bool case_sensitive = true) { @@ -581,61 +435,9 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map metrics; - metrics.push_back(METRIC_KEY(AVAILABLE_DEVICES)); - metrics.push_back(METRIC_KEY(SUPPORTED_METRICS)); - metrics.push_back(METRIC_KEY(FULL_DEVICE_NAME)); - metrics.push_back(METRIC_KEY(OPTIMIZATION_CAPABILITIES)); - metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS)); - metrics.push_back(METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)); - metrics.push_back(METRIC_KEY(RANGE_FOR_STREAMS)); - metrics.push_back(METRIC_KEY(DEVICE_TYPE)); - metrics.push_back(METRIC_KEY(DEVICE_GOPS)); - metrics.push_back(METRIC_KEY(OPTIMAL_BATCH_SIZE)); - metrics.push_back(METRIC_KEY(MAX_BATCH_SIZE)); - if (isModelCachingEnabled) - metrics.push_back(METRIC_KEY(IMPORT_EXPORT_SUPPORT)); - metrics.push_back(GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE)); - metrics.push_back(GPU_METRIC_KEY(UARCH_VERSION)); - metrics.push_back(GPU_METRIC_KEY(EXECUTION_UNITS_COUNT)); - metrics.push_back(GPU_METRIC_KEY(MEMORY_STATISTICS)); - IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics); + IE_SET_METRIC_RETURN(SUPPORTED_METRICS, LegacyAPIHelper::get_supported_metrics(isModelCachingEnabled)); } else if (name == METRIC_KEY(AVAILABLE_DEVICES)) { std::vector availableDevices = { }; for (auto const& dev : device_map) @@ -681,67 +483,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map(1)}; - } - std::shared_ptr model; - try { - model = model_param->second.as>(); - } catch (...) { - IE_THROW() << "[OPTIMAL_BATCH_SIZE] ov::hint::model should be std::shared_ptr type"; - } - GPU_DEBUG_INFO << "DEVICE_INFO:" - << "gfx_version.major, " << device_info.gfx_ver.major - << "gfx_version.minor " << std::to_string(device_info.gfx_ver.minor) << std::endl; - static std::map gen_kbytes_per_bank = { - {{12, 0, 0}, 480}, // TGL - {{12, 1, 0}, 2048}, // DG1 - {{12, 5, 0}, 320}, - {{12, 7, 0}, 512}, - }; - size_t L3_cache_size = device_info.gfx_ver.major && (device_info.gfx_ver.major <= 9) - ? 768 * 1024 // Gen9 - : 2 * 768 * 1024; //reasonable default when no arch has been detected (e.g. due to old driver ver) - cldnn::gfx_version gen = {device_info.gfx_ver.major, device_info.gfx_ver.minor, 0 /*ignore the revision*/}; - auto val = gen_kbytes_per_bank.find(gen); - if (gen_kbytes_per_bank.end() != val) { - auto kbytes_per_bank = val->second; - auto num_banks_per_slice = device_info.num_sub_slices_per_slice > 4 - ? next_pow_of_2(device_info.num_sub_slices_per_slice) - : 2 * device_info.num_sub_slices_per_slice; - L3_cache_size = kbytes_per_bank * 1024 * num_banks_per_slice * device_info.num_slices; - GPU_DEBUG_INFO << "DEVICE_INFO:" - << "num_slices " << device_info.num_slices - << ", num_sub_slices_per_slice " << device_info.num_sub_slices_per_slice - << ", num_banks_per_slice " << num_banks_per_slice - << ", gen_kbytes_per_bank : " << kbytes_per_bank - << ", L3_cache_size is (MB): " << float(L3_cache_size) / 1024 / 1024 << std::endl; - } - Config config = _impl->m_configs.GetConfig(device_id); - auto networkCloned = CloneAndTransformNetwork(CNNNetwork(model), config); - ov::MemBandwidthPressure memPressure = ov::MemBandwidthPressureTolerance(networkCloned.getFunction(), L3_cache_size); - unsigned int batch = 1; - if (memPressure.max_mem_tolerance != ov::MemBandwidthPressure::UNKNOWN) - batch = std::max(1.0, 16 * closest_pow_of_2(memPressure.max_mem_tolerance)); - std::map options_for_max_batch; - options_for_max_batch[ov::hint::model.name()] = model; - options_for_max_batch["GPU_THROUGHPUT_STREAMS"] = CONFIG_VALUE(GPU_THROUGHPUT_AUTO); - auto max_batch_size = GetMetric(ov::max_batch_size.name(), options_for_max_batch).as(); - unsigned int closest = closest_pow_of_2(max_batch_size); - batch = std::min(closest, batch); - batch = std::min(256u, batch); //batch 256 is a max - GPU_DEBUG_INFO << memPressure.max_mem_tolerance << std::endl; - GPU_DEBUG_INFO << "MAX_BATCH: " << max_batch_size << std::endl; - GPU_DEBUG_INFO << "ACTUAL OPTIMAL BATCH: " << batch << std::endl; - return decltype(ov::optimal_batch_size)::value_type {batch}; + return decltype(ov::optimal_batch_size)::value_type {get_optimal_batch_size(options)}; } else if (name == ov::device::uuid) { ov::device::UUID uuid = {}; std::copy_n(std::begin(device_info.uuid.val), cldnn::device_uuid::max_uuid_size, std::begin(uuid.uuid)); @@ -751,29 +493,9 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map configKeys; - for (auto opt : _impl->m_configs.GetConfig(device_id).key_config_map) { - // Exclude new API properties - if (!Config::isNewApiProperty(opt.first)) - configKeys.push_back(opt.first); - } - IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys); + IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, LegacyAPIHelper::get_supported_configs()); } else if (name == ov::device::capabilities) { - std::vector capabilities; - - capabilities.push_back(ov::device::capability::FP32); - capabilities.push_back(ov::device::capability::BIN); - if (!is_new_api) - capabilities.push_back(METRIC_VALUE(BATCHED_BLOB)); - if (device_info.supports_fp16) - capabilities.push_back(ov::device::capability::FP16); - if (device_info.supports_imad || device_info.supports_immad) - capabilities.push_back(ov::device::capability::INT8); - if (device_info.supports_immad) - capabilities.push_back(ov::intel_gpu::capability::HW_MATMUL); - if (isModelCachingEnabled) - capabilities.push_back(ov::device::capability::EXPORT_IMPORT); - return decltype(ov::device::capabilities)::value_type {capabilities}; + return decltype(ov::device::capabilities)::value_type {get_device_capabilities(device_info)}; } else if (name == ov::range_for_async_infer_requests) { std::tuple range = std::make_tuple(1, 2, 1); IE_SET_METRIC_RETURN(RANGE_FOR_ASYNC_INFER_REQUESTS, range); @@ -785,7 +507,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map statistics; for (auto const &item : statistics_map) { // Before collecting memory statistics of each context, it's updated with the latest memory statistics from engine. - UpdateStatistics(item.first); + update_memory_statistics(item.first); for (auto const &kv : item.second) { if (!statistics.count(kv.first)) { statistics[kv.first] = kv.second; @@ -797,154 +519,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::mapm_configs.GetConfig(device_id); - uint32_t n_streams = static_cast(config.throughput_streams); - uint64_t occupied_device_mem = 0; - auto statistic_result = GetMetric(ov::intel_gpu::memory_statistics.name(), options).as>(); - auto occupied_usm_dev = statistic_result.find("usm_device_current"); - if (occupied_usm_dev != statistic_result.end()) { - occupied_device_mem = occupied_usm_dev->second; - } - - int64_t available_device_mem = device_info.max_global_mem_size - occupied_device_mem; - GPU_DEBUG_LOG << "[GPU_MAX_BATCH_SIZE] available memory is " << available_device_mem - << " (occupied: " << occupied_device_mem << ")" << std::endl; - - int64_t max_batch_size = 1; - - if (options.find(ov::hint::model.name()) == options.end()) { - GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] MODELS_PTR is not set: return 1" << std::endl; - return decltype(ov::max_batch_size)::value_type {static_cast(max_batch_size)}; - } - - auto it_streams = options.find("GPU_THROUGHPUT_STREAMS") != options.end() ? options.find("GPU_THROUGHPUT_STREAMS") : - options.find(ov::num_streams.name()) != options.end() ? options.find(ov::num_streams.name()) : - options.end(); - if (it_streams != options.end()) { - if (it_streams->second.is()) { - n_streams = it_streams->second.as(); - } else if (it_streams->second.is()) { - n_streams = it_streams->second.as(); - } else if (it_streams->second.is()) { - std::string n_streams_str = it_streams->second.as(); - if (n_streams_str != CONFIG_VALUE(GPU_THROUGHPUT_AUTO) && - n_streams_str != util::to_string(ov::streams::AUTO)) { - IE_THROW() << "[GPU_MAX_BATCH_SIZE] bad casting: GPU_THROUGHPUT_STREAMS should be either of uint32_t type or \"GPU_THROUGHPUT_AUTO\""; - } - n_streams = std::max(config.GetDefaultNStreamsForThroughputMode(), device_info.num_ccs); - } else { - IE_THROW() << "[GPU_MAX_BATCH_SIZE] bad casting: GPU_THROUGHPUT_STREAMS should be either of uint32_t type or \"GPU_THROUGHPUT_AUTO\""; - } - } - - GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] n_streams : " << n_streams << std::endl; - - auto available_device_mem_it = options.find(ov::intel_gpu::hint::available_device_mem.name()); - if (available_device_mem_it != options.end()) { - if (available_device_mem_it->second.is()) { - available_device_mem = std::min(static_cast(available_device_mem), available_device_mem_it->second.as()); - GPU_DEBUG_LOG << "[GPU_MAX_BATCH_SIZE] available memory is reset by user " << available_device_mem << std::endl; - } else { - IE_THROW() << "[GPU_MAX_BATCH_SIZE] bad casting: ov::intel_gpu::hint::available_device_mem should be int64_t type"; - } - if (available_device_mem < 0) { - IE_THROW() << "[GPU_MAX_BATCH_SIZE] ov::intel_gpu::hint::available_device_mem value should be greater than 0 for max batch size calculation"; - } - } - - std::shared_ptr model; - auto model_param = options.find(ov::hint::model.name())->second; - if (model_param.is>()) { - model = model_param.as>(); - } else { - IE_THROW() << "[GPU_MAX_BATCH_SIZE] ov::hint::model should be std::shared_ptr type"; - } - - InferenceEngine::CNNNetwork network(model); - size_t base_batch_size = 16; // empirically decided for DG1 - auto engine_params = Plugin::GetParams(config, device, nullptr); - auto engine = cldnn::engine::create(engine_params.engine_type, engine_params.runtime_type, device, - cldnn::engine_configuration(false, engine_params.queue_type, std::string(), - config.queuePriority, config.queueThrottle, true, - engine_params.use_unified_shared_memory, std::string(), config.throughput_streams), - engine_params.task_executor); - - std::shared_ptr program; - - GPU_DEBUG_IF(debug_config->base_batch_for_memory_estimation > 0) { - size_t user_specified_base_batch_size = debug_config->base_batch_for_memory_estimation; - base_batch_size = (user_specified_base_batch_size != base_batch_size) ? user_specified_base_batch_size : base_batch_size; - } - - auto cloned_network = InferenceEngine::details::cloneNetwork(network); - auto inputs_info = cloned_network.getInputsInfo(); - ICNNNetwork::InputShapes new_shapes; - - try { - std::set> batched_inputs; - - auto function = InferenceEngine::details::cloneNetwork(cloned_network).getFunction(); - ov::pass::Manager m; - m.register_pass(); - m.register_pass(true, false); - m.run_passes(function); - const auto& params = function->get_parameters(); - for (size_t input_id = 0; input_id < params.size(); input_id++) { - const auto& input = params[input_id]; - const auto& shape = input->get_partial_shape(); - // currently no plugin support batched execution for dynamic networks - if (shape.is_dynamic()) { - GPU_DEBUG_LOG << "[MAX_BATCH_SIZE] does not support dynamic networks" << std::endl; - return decltype(ov::max_batch_size)::value_type {static_cast(max_batch_size)}; - } - - if (shape.size()) { - for (size_t s = 0; s < shape.size(); s++) { - if (ov::DimensionTracker::get_label(shape[s])) { - // batched dim for the input - auto batched_input_id = ngraph::op::util::get_ie_output_name(params[input_id]->output(0)); - GPU_DEBUG_LOG << "[MAX_BATCH_SIZE] detected batched input " << batched_input_id - << "[" << s << "]" << std::endl; - batched_inputs.insert(std::make_pair(batched_input_id, s)); - } - } - } - } - - if (!batched_inputs.size()) { - GPU_DEBUG_LOG << "[MAX_BATCH_SIZE] MAX_BATCH_SIZE supports only networks with inputs/outputs featuring batched dim." << std::endl; - return decltype(ov::max_batch_size)::value_type {static_cast(max_batch_size)}; - } - - try { - ICNNNetwork::InputShapes shapes = cloned_network.getInputShapes(); - for (const auto& input : batched_inputs) - shapes[input.first][input.second] = base_batch_size; - cloned_network.reshape(shapes); - } catch (...) { - GPU_DEBUG_INFO << "[MAX_BATCH_SIZE] Error at reshape to " << base_batch_size << std::endl; - return decltype(ov::max_batch_size)::value_type {static_cast(max_batch_size)}; - } - - auto nGraphFunc = cloned_network.getFunction(); - TransformationsPipeline transformations(config, device_info); - transformations.apply(nGraphFunc); - program = std::make_shared(cloned_network, engine, config, false, true); - std::pair device_memory_usage = program->GetCompiledProgram(0)->get_estimated_device_mem_usage(); - if (device_memory_usage.first == static_cast(-1L) && device_memory_usage.second == static_cast(-1L)) { - return decltype(ov::max_batch_size)::value_type {static_cast(max_batch_size)}; - } - int64_t mem_for_general = std::max(static_cast(1L), - static_cast(static_cast(available_device_mem) - device_memory_usage.first)); - int64_t mem_per_batch = std::max(static_cast(1L), (device_memory_usage.second / static_cast(base_batch_size))); - max_batch_size = mem_for_general / (mem_per_batch * static_cast(n_streams)); - GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] Base batch size: " << base_batch_size << std::endl; - GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] Const mem usage: " << device_memory_usage.first << std::endl; - GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] General mem usage: " << device_memory_usage.second << std::endl; - } catch (std::exception& e) { - GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] Failed in reshape or build program " << e.what() << std::endl; - } - return decltype(ov::max_batch_size)::value_type {static_cast(max_batch_size)}; + return decltype(ov::max_batch_size)::value_type {static_cast(get_max_batch_size(options))}; } else if (isModelCachingEnabled && name == METRIC_KEY(IMPORT_EXPORT_SUPPORT)) { IE_SET_METRIC_RETURN(IMPORT_EXPORT_SUPPORT, true); } else if (name == ov::caching_properties) { @@ -975,6 +550,285 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map Plugin::get_supported_properties() const { + static const std::vector supported_properties = { + // Metrics + ov::PropertyName{ov::supported_properties.name(), PropertyMutability::RO}, + ov::PropertyName{ov::available_devices.name(), PropertyMutability::RO}, + ov::PropertyName{ov::range_for_async_infer_requests.name(), PropertyMutability::RO}, + ov::PropertyName{ov::range_for_streams.name(), PropertyMutability::RO}, + ov::PropertyName{ov::optimal_batch_size.name(), PropertyMutability::RO}, + ov::PropertyName{ov::max_batch_size.name(), PropertyMutability::RO}, + ov::PropertyName{ov::caching_properties.name(), PropertyMutability::RO}, + ov::PropertyName{ov::device::architecture.name(), PropertyMutability::RO}, + ov::PropertyName{ov::device::full_name.name(), PropertyMutability::RO}, + ov::PropertyName{ov::device::uuid.name(), PropertyMutability::RO}, + ov::PropertyName{ov::device::type.name(), PropertyMutability::RO}, + ov::PropertyName{ov::device::gops.name(), PropertyMutability::RO}, + ov::PropertyName{ov::device::capabilities.name(), PropertyMutability::RO}, + ov::PropertyName{ov::intel_gpu::device_total_mem_size.name(), PropertyMutability::RO}, + ov::PropertyName{ov::intel_gpu::uarch_version.name(), PropertyMutability::RO}, + ov::PropertyName{ov::intel_gpu::execution_units_count.name(), PropertyMutability::RO}, + ov::PropertyName{ov::intel_gpu::memory_statistics.name(), PropertyMutability::RO}, + + // Configs + ov::PropertyName{ov::enable_profiling.name(), PropertyMutability::RW}, + ov::PropertyName{ov::hint::model_priority.name(), PropertyMutability::RW}, + ov::PropertyName{ov::intel_gpu::hint::host_task_priority.name(), PropertyMutability::RW}, + ov::PropertyName{ov::intel_gpu::hint::queue_priority.name(), PropertyMutability::RW}, + ov::PropertyName{ov::intel_gpu::hint::queue_throttle.name(), PropertyMutability::RW}, + ov::PropertyName{ov::intel_gpu::enable_loop_unrolling.name(), PropertyMutability::RW}, + ov::PropertyName{ov::cache_dir.name(), PropertyMutability::RW}, + ov::PropertyName{ov::hint::performance_mode.name(), PropertyMutability::RW}, + ov::PropertyName{ov::compilation_num_threads.name(), PropertyMutability::RW}, + ov::PropertyName{ov::num_streams.name(), PropertyMutability::RW}, + ov::PropertyName{ov::hint::num_requests.name(), PropertyMutability::RW}, + ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RW}, + ov::PropertyName{ov::device::id.name(), PropertyMutability::RW}, + }; + + return supported_properties; +} + +std::vector Plugin::get_device_capabilities(const cldnn::device_info& info) const { + std::vector capabilities; + + capabilities.push_back(ov::device::capability::FP32); + capabilities.push_back(ov::device::capability::BIN); + if (!IsNewAPI()) + capabilities.push_back(METRIC_VALUE(BATCHED_BLOB)); + if (info.supports_fp16) + capabilities.push_back(ov::device::capability::FP16); + if (info.supports_imad || info.supports_immad) + capabilities.push_back(ov::device::capability::INT8); + if (info.supports_immad) + capabilities.push_back(ov::intel_gpu::capability::HW_MATMUL); + if (isModelCachingEnabled) + capabilities.push_back(ov::device::capability::EXPORT_IMPORT); + + return capabilities; +} + +uint32_t Plugin::get_max_batch_size(const std::map& options) const { + GPU_DEBUG_GET_INSTANCE(debug_config); + std::string device_id = GetConfig(ov::device::id.name(), options); + auto context = m_default_contexts.at(device_id)->get_impl(); + const auto& device_info = context->get_engine().get_device_info(); + const auto& config = m_configs_map.at(device_id); + uint32_t n_streams = static_cast(config.get_property(ov::num_streams)); + uint64_t occupied_device_mem = 0; + auto statistic_result = GetMetric(ov::intel_gpu::memory_statistics.name(), options).as>(); + auto occupied_usm_dev = statistic_result.find("usm_device_current"); + if (occupied_usm_dev != statistic_result.end()) { + occupied_device_mem = occupied_usm_dev->second; + } + + int64_t available_device_mem = device_info.max_global_mem_size - occupied_device_mem; + GPU_DEBUG_LOG << "[GPU_MAX_BATCH_SIZE] available memory is " << available_device_mem + << " (occupied: " << occupied_device_mem << ")" << std::endl; + + int64_t max_batch_size = 1; + + if (options.find(ov::hint::model.name()) == options.end()) { + GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] MODELS_PTR is not set: return 1" << std::endl; + return static_cast(max_batch_size); + } + + auto it_streams = options.find("GPU_THROUGHPUT_STREAMS") != options.end() ? options.find("GPU_THROUGHPUT_STREAMS") : + options.find(ov::num_streams.name()) != options.end() ? options.find(ov::num_streams.name()) : + options.end(); + if (it_streams != options.end()) { + if (it_streams->second.is()) { + n_streams = it_streams->second.as(); + } else if (it_streams->second.is()) { + n_streams = it_streams->second.as(); + } else if (it_streams->second.is()) { + std::string n_streams_str = it_streams->second.as(); + if (n_streams_str != CONFIG_VALUE(GPU_THROUGHPUT_AUTO) && + n_streams_str != util::to_string(ov::streams::AUTO)) { + IE_THROW() << "[GPU_MAX_BATCH_SIZE] bad casting: GPU_THROUGHPUT_STREAMS should be either of uint32_t type or \"GPU_THROUGHPUT_AUTO\""; + } + n_streams = std::max(/* config.GetDefaultNStreamsForThroughputMode() */2u, device_info.num_ccs); + } else { + IE_THROW() << "[GPU_MAX_BATCH_SIZE] bad casting: GPU_THROUGHPUT_STREAMS should be either of uint32_t type or \"GPU_THROUGHPUT_AUTO\""; + } + } + + GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] n_streams : " << n_streams << std::endl; + + auto available_device_mem_it = options.find(ov::intel_gpu::hint::available_device_mem.name()); + if (available_device_mem_it != options.end()) { + if (available_device_mem_it->second.is()) { + available_device_mem = std::min(static_cast(available_device_mem), available_device_mem_it->second.as()); + GPU_DEBUG_LOG << "[GPU_MAX_BATCH_SIZE] available memory is reset by user " << available_device_mem << std::endl; + } else { + IE_THROW() << "[GPU_MAX_BATCH_SIZE] bad casting: ov::intel_gpu::hint::available_device_mem should be int64_t type"; + } + if (available_device_mem < 0) { + IE_THROW() << "[GPU_MAX_BATCH_SIZE] ov::intel_gpu::hint::available_device_mem value should be greater than 0 for max batch size calculation"; + } + } + + std::shared_ptr model; + auto model_param = options.find(ov::hint::model.name())->second; + if (model_param.is>()) { + model = model_param.as>(); + } else { + IE_THROW() << "[GPU_MAX_BATCH_SIZE] ov::hint::model should be std::shared_ptr type"; + } + + InferenceEngine::CNNNetwork network(model); + size_t base_batch_size = 16; // empirically decided for DG1 + + auto& engine = get_default_context(device_id)->get_impl()->get_engine(); + + std::shared_ptr program; + + GPU_DEBUG_IF(debug_config->base_batch_for_memory_estimation > 0) { + size_t user_specified_base_batch_size = debug_config->base_batch_for_memory_estimation; + base_batch_size = (user_specified_base_batch_size != base_batch_size) ? user_specified_base_batch_size : base_batch_size; + } + + auto cloned_network = InferenceEngine::details::cloneNetwork(network); + auto inputs_info = cloned_network.getInputsInfo(); + ICNNNetwork::InputShapes new_shapes; + + try { + std::set> batched_inputs; + + auto function = InferenceEngine::details::cloneNetwork(cloned_network).getFunction(); + ov::pass::Manager m; + m.register_pass(); + m.register_pass(true, false); + m.run_passes(function); + const auto& params = function->get_parameters(); + for (size_t input_id = 0; input_id < params.size(); input_id++) { + const auto& input = params[input_id]; + const auto& shape = input->get_partial_shape(); + // currently no plugin support batched execution for dynamic networks + if (shape.is_dynamic()) { + GPU_DEBUG_LOG << "[MAX_BATCH_SIZE] does not support dynamic networks" << std::endl; + return static_cast(max_batch_size); + } + + if (shape.size()) { + for (size_t s = 0; s < shape.size(); s++) { + if (ov::DimensionTracker::get_label(shape[s])) { + // batched dim for the input + auto batched_input_id = ngraph::op::util::get_ie_output_name(params[input_id]->output(0)); + GPU_DEBUG_LOG << "[MAX_BATCH_SIZE] detected batched input " << batched_input_id + << "[" << s << "]" << std::endl; + batched_inputs.insert(std::make_pair(batched_input_id, s)); + } + } + } + } + + if (!batched_inputs.size()) { + GPU_DEBUG_LOG << "[MAX_BATCH_SIZE] MAX_BATCH_SIZE supports only networks with inputs/outputs featuring batched dim." << std::endl; + return static_cast(max_batch_size); + } + + try { + ICNNNetwork::InputShapes shapes = cloned_network.getInputShapes(); + for (const auto& input : batched_inputs) + shapes[input.first][input.second] = base_batch_size; + cloned_network.reshape(shapes); + } catch (...) { + GPU_DEBUG_INFO << "[MAX_BATCH_SIZE] Error at reshape to " << base_batch_size << std::endl; + return static_cast(max_batch_size); + } + + auto nGraphFunc = cloned_network.getFunction(); + TransformationsPipeline transformations(config, device_info); + transformations.apply(nGraphFunc); + program = std::make_shared(cloned_network, engine, config, false, true); + std::pair device_memory_usage = program->GetCompiledProgram(0)->get_estimated_device_mem_usage(); + if (device_memory_usage.first == static_cast(-1L) && device_memory_usage.second == static_cast(-1L)) { + return static_cast(max_batch_size); + } + int64_t mem_for_general = std::max(1, available_device_mem - device_memory_usage.first); + int64_t mem_per_batch = std::max(1, device_memory_usage.second / static_cast(base_batch_size)); + max_batch_size = mem_for_general / (mem_per_batch * static_cast(n_streams)); + GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] Base batch size: " << base_batch_size << std::endl; + GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] Const mem usage: " << device_memory_usage.first << std::endl; + GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] General mem usage: " << device_memory_usage.second << std::endl; + } catch (std::exception& e) { + GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] Failed in reshape or build program " << e.what() << std::endl; + } + + return static_cast(max_batch_size); +} + +uint32_t Plugin::get_optimal_batch_size(const std::map& options) const { + std::string device_id = GetConfig(ov::device::id.name(), options); + auto context = m_default_contexts.at(device_id)->get_impl(); + const auto& device_info = context->get_engine().get_device_info(); + auto next_pow_of_2 = [] (float x) { + return pow(2, ceil(std::log(x)/std::log(2))); + }; + auto closest_pow_of_2 = [] (float x) { + return pow(2, floor(std::log(x)/std::log(2))); + }; + auto model_param = options.find(ov::hint::model.name()); + if (model_param == options.end()) { + GPU_DEBUG_INFO << "[OPTIMAL_BATCH_SIZE] ov::hint::model is not set: return 1" << std::endl; + return static_cast(1); + } + std::shared_ptr model; + try { + model = model_param->second.as>(); + } catch (...) { + IE_THROW() << "[OPTIMAL_BATCH_SIZE] ov::hint::model should be std::shared_ptr type"; + } + GPU_DEBUG_INFO << "DEVICE_INFO:" + << "gfx_version.major, " << device_info.gfx_ver.major + << "gfx_version.minor " << std::to_string(device_info.gfx_ver.minor) << std::endl; + static std::map gen_kbytes_per_bank = { + {{12, 0, 0}, 480}, // TGL + {{12, 1, 0}, 2048}, // DG1 + {{12, 5, 0}, 320}, + {{12, 7, 0}, 512}, + }; + size_t L3_cache_size = device_info.gfx_ver.major && (device_info.gfx_ver.major <= 9) + ? 768 * 1024 // Gen9 + : 2 * 768 * 1024; //reasonable default when no arch has been detected (e.g. due to old driver ver) + cldnn::gfx_version gen = {device_info.gfx_ver.major, device_info.gfx_ver.minor, 0 /*ignore the revision*/}; + auto val = gen_kbytes_per_bank.find(gen); + if (gen_kbytes_per_bank.end() != val) { + auto kbytes_per_bank = val->second; + auto num_banks_per_slice = device_info.num_sub_slices_per_slice > 4 + ? next_pow_of_2(device_info.num_sub_slices_per_slice) + : 2 * device_info.num_sub_slices_per_slice; + L3_cache_size = kbytes_per_bank * 1024 * num_banks_per_slice * device_info.num_slices; + GPU_DEBUG_INFO << "DEVICE_INFO:" + << "num_slices " << device_info.num_slices + << ", num_sub_slices_per_slice " << device_info.num_sub_slices_per_slice + << ", num_banks_per_slice " << num_banks_per_slice + << ", gen_kbytes_per_bank : " << kbytes_per_bank + << ", L3_cache_size is (MB): " << float(L3_cache_size) / 1024 / 1024 << std::endl; + } + auto config = m_configs_map.at(device_id); + auto networkCloned = clone_and_transform_model(CNNNetwork(model), config); + ov::MemBandwidthPressure memPressure = ov::MemBandwidthPressureTolerance(networkCloned.getFunction(), L3_cache_size); + uint32_t batch = 1; + if (memPressure.max_mem_tolerance != ov::MemBandwidthPressure::UNKNOWN) + batch = std::max(1.0, 16 * closest_pow_of_2(memPressure.max_mem_tolerance)); + std::map options_for_max_batch; + options_for_max_batch[ov::hint::model.name()] = model; + options_for_max_batch["GPU_THROUGHPUT_STREAMS"] = CONFIG_VALUE(GPU_THROUGHPUT_AUTO); + auto max_batch_size = GetMetric(ov::max_batch_size.name(), options_for_max_batch).as(); + uint32_t closest = closest_pow_of_2(max_batch_size); + batch = std::min(closest, batch); + batch = std::min(256u, batch); //batch 256 is a max + GPU_DEBUG_INFO << memPressure.max_mem_tolerance << std::endl; + GPU_DEBUG_INFO << "MAX_BATCH: " << max_batch_size << std::endl; + GPU_DEBUG_INFO << "ACTUAL OPTIMAL BATCH: " << batch << std::endl; + + return batch; +} + } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/program.cpp b/src/plugins/intel_gpu/src/plugin/program.cpp index 8963e00fb2f..7f5180ffe91 100644 --- a/src/plugins/intel_gpu/src/plugin/program.cpp +++ b/src/plugins/intel_gpu/src/plugin/program.cpp @@ -16,6 +16,10 @@ #include "intel_gpu/primitives/mutable_data.hpp" #include "intel_gpu/primitives/data.hpp" +#ifdef __linux__ +# include +#endif + using namespace InferenceEngine; using namespace InferenceEngine::details; @@ -121,7 +125,7 @@ bool Program::IsDynBatchModel(const std::shared_ptr& model, return dyn_shape_batch_found; } -Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr engine, const Config& config, +Program::Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config, bool createTopologyOnly, bool partialBuild) : m_curBatch(-1) , m_config(config) @@ -136,30 +140,60 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr(CustomLayer::LoadFromFile), &dl_info); + const char* mpath = dl_info.dli_fname; +#endif + std::string configFile(mpath); + std::size_t dir_split_pos = configFile.find_last_of("/\\"); + std::string config_path; + + if (dir_split_pos != std::string::npos) { + // path contains directory + config_path = configFile.substr(0, dir_split_pos); + } + config_path += "/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml"; + + CustomLayer::LoadFromFile(config_path, m_custom_layers, true); + auto custom_layers_config = m_config.get_property(ov::intel_gpu::config_file); + CustomLayer::LoadFromFile(custom_layers_config, m_custom_layers, custom_layers_config.empty()); + auto ops = func->get_ordered_ops(); bool dyn_shape_batch_found = false; std::map shapes; std::map> batch_dim; - if (m_config.enableDynamicBatch) { + auto enable_dynamic_batch = m_config.get_property(ov::intel_gpu::enable_dynamic_batch); + if (enable_dynamic_batch) { + m_config.set_property(ov::intel_gpu::max_dynamic_batch(network.getBatchSize())); // in case of legacy dynamic batch, // we assume 4D input with 0 batch dim auto param = func->get_parameters().front(); auto pname = getParamName(param); shapes[pname] = param->get_output_partial_shape(0); batch_dim[pname].first = 0; - batch_dim[pname].second = m_config.max_dynamic_batch; + batch_dim[pname].second = m_config.get_property(ov::intel_gpu::max_dynamic_batch); } else { dyn_shape_batch_found = IsDynBatchModel(func, shapes, batch_dim); if (dyn_shape_batch_found) { - m_config.max_dynamic_batch = batch_dim.begin()->second.second; + m_config.set_property(ov::intel_gpu::max_dynamic_batch(batch_dim.begin()->second.second)); } } int m_bv_sz = GetMaxBatchSizeForSingleProgram(); - m_max_batch = m_config.max_dynamic_batch; + m_max_batch = m_config.get_property(ov::intel_gpu::max_dynamic_batch); - if (dyn_shape_batch_found || config.max_dynamic_batch > 1) { + if (dyn_shape_batch_found || m_max_batch > 1) { // compile log2 networks to serve dynamic batch requests for (int b = m_bv_sz - 1; b >= 0; b--) { inputLayouts.clear(); @@ -188,8 +222,8 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptrreshape(new_shapes); { - auto deviceInfo = engine->get_device_info(); - TransformationsPipeline transformations(config, deviceInfo); + auto deviceInfo = engine.get_device_info(); + TransformationsPipeline transformations(m_config, deviceInfo); transformations.apply(new_func); } @@ -275,9 +309,10 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr 1) { + auto max_dynamic_batch = m_config.get_property(ov::intel_gpu::max_dynamic_batch); + if (max_dynamic_batch > 1) { // calculate number of networks necessary based on binary log - unsigned int tmp = m_config.max_dynamic_batch; + unsigned int tmp = max_dynamic_batch; unsigned int mask = 1U << 31; unsigned int ldigit = 31; @@ -324,7 +359,6 @@ std::shared_ptr Program::BuildProgram(const std::vectoris_dynamic()) { @@ -333,11 +367,10 @@ std::shared_ptr Program::BuildProgram(const std::vector Program::BuildProgram(const std::vectorget_type_info(); while (op_type_info != nullptr) { - auto customLayer = m_config.customLayers.find(op->get_type_name()); - if (customLayer != m_config.customLayers.end()) { + auto customLayer = m_custom_layers.find(op->get_type_name()); + if (customLayer != m_custom_layers.end()) { CreateCustomOp(*this, op, customLayer->second); return; } @@ -488,7 +521,7 @@ void Program::add_primitive(const ngraph::Node& op, std::shared_ptrorigin_op_type_name = prim->type_string(); } - if (this->m_config.useProfiling && should_profile) { + if (this->m_config.get_property(ov::enable_profiling) && should_profile) { profiling_ids.push_back(prim_id); init_profile_info(*prim); } diff --git a/src/plugins/intel_gpu/src/plugin/remote_allocators.cpp b/src/plugins/intel_gpu/src/plugin/remote_allocators.cpp new file mode 100644 index 00000000000..c558f8490c8 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/remote_allocators.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "intel_gpu/plugin/remote_allocators.hpp" +#include "intel_gpu/plugin/remote_blob.hpp" + +using namespace InferenceEngine; +using namespace InferenceEngine::gpu; +using namespace InferenceEngine::details; + +namespace ov { +namespace intel_gpu { + +void RemoteAllocator::regLockedBlob(void* handle, const RemoteBlobImpl* blob) { + std::lock_guard locker(*this); + auto iter = m_lockedBlobs.find(handle); + if (iter == m_lockedBlobs.end()) { + m_lockedBlobs.emplace(handle, blob); + } +} + +void RemoteAllocator::unlock(void* handle) noexcept { + std::lock_guard locker(*this); + auto iter = m_lockedBlobs.find(handle); + if (iter != m_lockedBlobs.end()) { + iter->second->unlock(); + m_lockedBlobs.erase(iter); + } +} + +void* USMHostAllocator::lock(void* handle, InferenceEngine::LockOp) noexcept { + if (!_usm_host_blob) + return nullptr; + try { + return _usm_host_blob->get(); + } catch (...) { + return nullptr; + } +}; + +void USMHostAllocator::unlock(void* handle) noexcept {} + +void* USMHostAllocator::alloc(size_t size) noexcept { + try { + auto td = TensorDesc(Precision::U8, SizeVector{size}, InferenceEngine::Layout::C); + ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_HOST_BUFFER)}}; + _usm_host_blob = std::dynamic_pointer_cast(_context->CreateBlob(td, params)); + _usm_host_blob->allocate(); + if (!getBlobImpl(_usm_host_blob.get())->is_allocated()) { + return nullptr; + } + return _usm_host_blob->get(); + } catch (...) { + return nullptr; + } +} + +bool USMHostAllocator::free(void* handle) noexcept { + try { + _usm_host_blob = nullptr; + } catch(...) { } + return true; +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/remote_blob.cpp b/src/plugins/intel_gpu/src/plugin/remote_blob.cpp new file mode 100644 index 00000000000..454a2f83fed --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/remote_blob.cpp @@ -0,0 +1,285 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "intel_gpu/plugin/remote_context.hpp" +#include "intel_gpu/plugin/remote_blob.hpp" +#include "intel_gpu/plugin/remote_allocators.hpp" +#include "intel_gpu/plugin/plugin.hpp" +#include "intel_gpu/runtime/itt.hpp" +#include "intel_gpu/runtime/device_query.hpp" + +using namespace InferenceEngine; +using namespace InferenceEngine::gpu; +using namespace InferenceEngine::details; + +namespace ov { +namespace intel_gpu { + +RemoteBlobImpl::RemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context, + cldnn::stream& stream, + const cldnn::layout& layout, + cldnn::shared_handle mem, + cldnn::shared_surface surf, + uint32_t plane, + BlobType mem_type) + : m_allocator(std::make_shared()) + , m_context(context) + , m_stream(stream) + , m_mem(mem) + , m_surf(surf) + , m_plane(plane) + , m_layout(layout) + , m_mem_type(mem_type) + , m_memory_object(nullptr) + , lockedCounter(0) + , lockedHolder(nullptr) + , _handle(nullptr) { + if (supports_caching()) { + m_hash = cldnn::hash_combine(0, m_mem); + m_hash = cldnn::hash_combine(m_hash, m_surf); + m_hash = cldnn::hash_combine(m_hash, plane); + m_hash = cldnn::hash_combine(m_hash, static_cast::type>(layout.format)); + m_hash = cldnn::hash_combine(m_hash, static_cast::type>(layout.data_type)); + for (auto& d : layout.get_shape()) { + m_hash = cldnn::hash_combine(m_hash, d); + } + } +} + +AnyMap RemoteBlobImpl::getParams() const { + OPENVINO_ASSERT(is_allocated(), "[GPU] Can't get RemoteBlob params as blob wasn't allocated properly"); + auto params = m_memory_object->get_internal_params(); + + switch (m_mem_type) { + case BlobType::BT_BUF_INTERNAL: + case BlobType::BT_BUF_SHARED: + return{ + { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER) }, + { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, + { GPU_PARAM_KEY(MEM_HANDLE), params.mem } + }; + case BlobType::BT_USM_SHARED: + return{ + { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_USER_BUFFER) }, + { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, + { GPU_PARAM_KEY(MEM_HANDLE), params.mem } + }; + case BlobType::BT_USM_HOST_INTERNAL: + return{ + { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_HOST_BUFFER) }, + { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, + { GPU_PARAM_KEY(MEM_HANDLE), params.mem } + }; + case BlobType::BT_USM_DEVICE_INTERNAL: + return{ + { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_DEVICE_BUFFER) }, + { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, + { GPU_PARAM_KEY(MEM_HANDLE), params.mem } + }; +#ifdef _WIN32 + case BlobType::BT_DX_BUF_SHARED: + return{ + { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(DX_BUFFER) }, + { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, + { GPU_PARAM_KEY(VA_DEVICE), params.user_device }, + { GPU_PARAM_KEY(MEM_HANDLE), params.mem }, + { GPU_PARAM_KEY(DEV_OBJECT_HANDLE), params.surface } + }; +#endif + case BlobType::BT_IMG_SHARED: + return{ + { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D) }, + { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, + { GPU_PARAM_KEY(MEM_HANDLE), params.mem } + }; + case BlobType::BT_SURF_SHARED: + return{ + { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) }, + { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, + { GPU_PARAM_KEY(VA_DEVICE), params.user_device }, + { GPU_PARAM_KEY(MEM_HANDLE), params.mem }, + { GPU_PARAM_KEY(DEV_OBJECT_HANDLE), params.surface }, + { GPU_PARAM_KEY(VA_PLANE), params.plane } + }; + default: + IE_THROW() << "Unsupported shared object type " << static_cast(m_mem_type); + } +} + +bool RemoteBlobImpl::deallocate() noexcept { + m_memory_object.reset(); + return m_memory_object == nullptr; +} + +bool RemoteBlobImpl::is_allocated() const noexcept { + return m_memory_object != nullptr; +} + +bool RemoteBlobImpl::is_locked() const noexcept { + return lockedHolder != nullptr; +} + +void RemoteBlobImpl::allocate() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "RemoteBlobImpl::Allocate"); + + auto context = get_context_impl(m_context); + auto enable_caching = supports_caching(); + + if (enable_caching) { + m_memory_object = context->try_get_cached_memory(m_hash); + if (m_memory_object) + return; + } + + + auto& engine = context->get_engine(); + + switch (m_mem_type) { + case BlobType::BT_BUF_INTERNAL: { + m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::cl_mem); + break; + } + case BlobType::BT_USM_HOST_INTERNAL: { + m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::usm_host); + break; + } + case BlobType::BT_USM_DEVICE_INTERNAL: { + m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::usm_device); + break; + } + case BlobType::BT_BUF_SHARED: { + m_memory_object = engine.share_buffer(m_layout, m_mem); + break; + } + case BlobType::BT_USM_SHARED: { + m_memory_object = engine.share_usm(m_layout, m_mem); + break; + } +#ifdef _WIN32 + case BlobType::BT_SURF_SHARED: { + m_memory_object = engine.share_surface(m_layout, m_mem, m_plane); + break; + } + case BlobType::BT_DX_BUF_SHARED: { + m_memory_object = engine.share_dx_buffer(m_layout, m_mem); + break; + } +#else + case BlobType::BT_SURF_SHARED: { + m_memory_object = engine.share_surface(m_layout, m_surf, m_plane); + break; + } +#endif + case BlobType::BT_IMG_SHARED: { + m_memory_object = engine.share_image(m_layout, m_mem); + break; + } + default: + m_memory_object.reset(); + } + + if (enable_caching) + context->add_to_cache(m_hash, m_memory_object); +} + +const std::shared_ptr& RemoteBlobImpl::getAllocator() const noexcept { + return m_allocator; +}; + +std::string RemoteBlobImpl::getDeviceName() const noexcept { + return m_context->getDeviceName(); +}; + +std::shared_ptr RemoteBlobImpl::getContext() const noexcept { + return m_context; +} + +void RemoteBlobImpl::reinterpret(cldnn::layout new_layout) { + OPENVINO_ASSERT(m_layout.bytes_count() >= new_layout.bytes_count(), + "[GPU] Can't reinterpret blob to the size bigger than allocated memory buffer"); + m_layout = new_layout; + auto engine = m_memory_object->get_engine(); + m_memory_object = engine->reinterpret_buffer(*m_memory_object, new_layout); +} + +void RemoteBlobImpl::lock() const { + if (!is_allocated()) { + IE_THROW(NotAllocated) << "[GPU] Remote blob can't be locked as it's not allocated"; + } + + std::lock_guard locker(lockedMutex); + if (lockedCounter == 0) { + lockedHolder = std::unique_ptr>(new cldnn::mem_lock(m_memory_object, m_stream)); + auto ptr = lockedHolder->data(); + _handle = reinterpret_cast(ptr); + auto casted_allocator = std::dynamic_pointer_cast(m_allocator); + OPENVINO_ASSERT(casted_allocator, "[GPU] Invalid remote allocator type"); + casted_allocator->regLockedBlob(_handle, this); + } + lockedCounter++; +} + +void RemoteBlobImpl::unlock() const { + std::lock_guard locker(lockedMutex); + lockedCounter--; + if (lockedCounter == 0) + lockedHolder.reset(); +} + +LockedMemory RemoteBlobImpl::buffer() noexcept { + try { + lock(); + return LockedMemory(m_allocator.get(), _handle, 0); + } catch (...) { + return LockedMemory(nullptr, nullptr, 0); + } +} + +LockedMemory RemoteBlobImpl::cbuffer() const noexcept { + try { + lock(); + return LockedMemory(m_allocator.get(), _handle, 0); + } catch (...) { + return LockedMemory(nullptr, nullptr, 0); + } +} + +LockedMemory RemoteBlobImpl::rwmap() noexcept { + try { + lock(); + return LockedMemory(m_allocator.get(), _handle, 0); + } catch (...) { + return LockedMemory(nullptr, nullptr, 0); + } +} + +LockedMemory RemoteBlobImpl::rmap() const noexcept { + try { + lock(); + return LockedMemory(m_allocator.get(), _handle, 0); + } catch (...) { + return LockedMemory(nullptr, nullptr, 0); + } +} + +LockedMemory RemoteBlobImpl::wmap() noexcept { + try { + lock(); + return LockedMemory(m_allocator.get(), _handle, 0); + } catch (...) { + return LockedMemory(nullptr, nullptr, 0); + } +} + +bool RemoteBlobImpl::supports_caching() const { + return m_mem_type == BlobType::BT_BUF_SHARED || + m_mem_type == BlobType::BT_USM_SHARED || + m_mem_type == BlobType::BT_IMG_SHARED || + m_mem_type == BlobType::BT_SURF_SHARED || + m_mem_type == BlobType::BT_DX_BUF_SHARED; +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/remote_context.cpp b/src/plugins/intel_gpu/src/plugin/remote_context.cpp index 368c88e49d8..e1eb0fb206f 100644 --- a/src/plugins/intel_gpu/src/plugin/remote_context.cpp +++ b/src/plugins/intel_gpu/src/plugin/remote_context.cpp @@ -4,7 +4,8 @@ #include #include "intel_gpu/plugin/remote_context.hpp" -#include "intel_gpu/plugin/plugin.hpp" +#include "intel_gpu/plugin/remote_blob.hpp" +#include "intel_gpu/plugin/remote_allocators.hpp" #include "intel_gpu/runtime/itt.hpp" #include "intel_gpu/runtime/device_query.hpp" @@ -14,285 +15,28 @@ using namespace InferenceEngine::details; namespace ov { namespace intel_gpu { -RemoteAllocator RemoteBlobImpl::m_allocator; -RemoteBlobImpl::RemoteBlobImpl(ClContext::Ptr context, - cldnn::stream& stream, - const cldnn::layout& layout, - cldnn::shared_handle mem, - cldnn::shared_surface surf, - uint32_t plane, - BlobType mem_type) - : m_context(context) - , m_stream(stream) - , m_mem(mem) - , m_surf(surf) - , m_plane(plane) - , m_layout(layout) - , m_mem_type(mem_type) - , m_memObject(nullptr) - , lockedCounter(0) - , lockedHolder(nullptr) - , _handle(nullptr) - , _allocator(nullptr) { - auto _impl = getContextImpl(m_context.lock()); - m_engine = _impl->GetEngine(); - - // Verify shared buffer/usm memory and ensure that requested byte size is not greater than allocated one - switch (m_mem_type) { - case BlobType::BT_BUF_SHARED: { - m_engine->share_buffer(m_layout, m_mem); - break; - } - case BlobType::BT_USM_SHARED: { - m_engine->share_usm(m_layout, m_mem); - break; - } - default: break; - } -} - -AnyMap RemoteBlobImpl::getParams() const { - assert(m_memObject != nullptr); - auto params = m_memObject->get_internal_params(); - - switch (m_mem_type) { - case BT_BUF_INTERNAL: - case BT_BUF_SHARED: - return{ - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER) }, - { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, - { GPU_PARAM_KEY(MEM_HANDLE), params.mem } - }; - case BT_USM_SHARED: - return{ - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_USER_BUFFER) }, - { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, - { GPU_PARAM_KEY(MEM_HANDLE), params.mem } - }; - case BT_USM_HOST_INTERNAL: - return{ - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_HOST_BUFFER) }, - { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, - { GPU_PARAM_KEY(MEM_HANDLE), params.mem } - }; - case BT_USM_DEVICE_INTERNAL: - return{ - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_DEVICE_BUFFER) }, - { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, - { GPU_PARAM_KEY(MEM_HANDLE), params.mem } - }; -#ifdef _WIN32 - case BT_DX_BUF_SHARED: - return{ - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(DX_BUFFER) }, - { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, - { GPU_PARAM_KEY(VA_DEVICE), params.user_device }, - { GPU_PARAM_KEY(MEM_HANDLE), params.mem }, - { GPU_PARAM_KEY(DEV_OBJECT_HANDLE), params.surface } - }; -#endif - case BT_IMG_SHARED: - return{ - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D) }, - { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, - { GPU_PARAM_KEY(MEM_HANDLE), params.mem } - }; - case BT_SURF_SHARED: - return{ - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) }, - { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, - { GPU_PARAM_KEY(VA_DEVICE), params.user_device }, - { GPU_PARAM_KEY(MEM_HANDLE), params.mem }, - { GPU_PARAM_KEY(DEV_OBJECT_HANDLE), params.surface }, - { GPU_PARAM_KEY(VA_PLANE), params.plane } - }; - default: - IE_THROW() << "Unsupported shared object type " << m_mem_type; - } -} - -bool RemoteBlobImpl::deallocate() noexcept { - m_memObject.reset(); - return m_memObject == nullptr; -} - -bool RemoteBlobImpl::is_allocated() const noexcept { - return m_memObject != nullptr; -} - -bool RemoteBlobImpl::is_locked() const noexcept { - return lockedHolder != nullptr; -} - -void RemoteBlobImpl::allocate() { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "RemoteBlobImpl::Allocate"); - assert(m_memObject == nullptr); - - auto _impl = getContextImpl(m_context.lock()); - std::lock_guard locker(*_impl); - - switch (m_mem_type) { - case BlobType::BT_BUF_INTERNAL: { - m_memObject = m_engine->allocate_memory(m_layout, cldnn::allocation_type::cl_mem); - break; - } - case BlobType::BT_USM_HOST_INTERNAL: { - m_memObject = m_engine->allocate_memory(m_layout, cldnn::allocation_type::usm_host); - break; - } - case BlobType::BT_USM_DEVICE_INTERNAL: { - m_memObject = m_engine->allocate_memory(m_layout, cldnn::allocation_type::usm_device); - break; - } - case BlobType::BT_BUF_SHARED: { - m_memObject = m_engine->share_buffer(m_layout, m_mem); - break; - } - case BlobType::BT_USM_SHARED: { - m_memObject = m_engine->share_usm(m_layout, m_mem); - break; - } -#ifdef _WIN32 - case BlobType::BT_SURF_SHARED: { - m_memObject = m_engine->share_surface(m_layout, m_mem, m_plane); - break; - } - case BlobType::BT_DX_BUF_SHARED: { - m_memObject = m_engine->share_dx_buffer(m_layout, m_mem); - break; - } -#else - case BlobType::BT_SURF_SHARED: { - m_memObject = m_engine->share_surface(m_layout, m_surf, m_plane); - break; - } -#endif - case BlobType::BT_IMG_SHARED: { - m_memObject = m_engine->share_image(m_layout, m_mem); - break; - } - default: - m_memObject.reset(); - } -} - -const std::shared_ptr& RemoteBlobImpl::getAllocator() const noexcept { - if (!_allocator) { - _allocator = std::shared_ptr(&m_allocator, [] (IAllocator*) {}); - } - return _allocator; -}; - -std::string RemoteBlobImpl::getDeviceName() const noexcept { - return getContextImpl(m_context.lock())->getDeviceName(); -}; - -std::shared_ptr RemoteBlobImpl::getContext() const noexcept { - return m_context.lock(); -} - -void RemoteBlobImpl::reinterpret(cldnn::layout new_layout) { - OPENVINO_ASSERT(m_layout.bytes_count() >= new_layout.bytes_count(), - "[GPU] Can't reinterpret blob to the size bigger than allocated memory buffer"); - m_layout = new_layout; - auto engine = m_memObject->get_engine(); - m_memObject = engine->reinterpret_buffer(*m_memObject, new_layout); -} - -void RemoteBlobImpl::lock() const { - if (!is_allocated()) { - IE_THROW(NotAllocated) << "[GPU] Remote blob can't be locked as it's not allocated"; - } - - std::lock_guard locker(lockedMutex); - if (lockedCounter == 0) { - lockedHolder = std::unique_ptr>(new cldnn::mem_lock(m_memObject, m_stream)); - auto ptr = lockedHolder->data(); - _handle = reinterpret_cast(ptr); - m_allocator.regLockedBlob(_handle, this); - } - lockedCounter++; -} - -void RemoteBlobImpl::unlock() const { - std::lock_guard locker(lockedMutex); - lockedCounter--; - if (lockedCounter == 0) - lockedHolder.reset(); -} - -LockedMemory RemoteBlobImpl::buffer() noexcept { - try { - lock(); - return LockedMemory(reinterpret_cast(&m_allocator), _handle, 0); - } catch (...) { - return LockedMemory(nullptr, nullptr, 0); - } -} - -LockedMemory RemoteBlobImpl::cbuffer() const noexcept { - try { - lock(); - return LockedMemory(reinterpret_cast(&m_allocator), _handle, 0); - } catch (...) { - return LockedMemory(nullptr, nullptr, 0); - } -} - -LockedMemory RemoteBlobImpl::rwmap() noexcept { - try { - lock(); - return LockedMemory(reinterpret_cast(&m_allocator), _handle, 0); - } catch (...) { - return LockedMemory(nullptr, nullptr, 0); - } -} - -LockedMemory RemoteBlobImpl::rmap() const noexcept { - try { - lock(); - return LockedMemory(reinterpret_cast(&m_allocator), _handle, 0); - } catch (...) { - return LockedMemory(nullptr, nullptr, 0); - } -} - -LockedMemory RemoteBlobImpl::wmap() noexcept { - try { - lock(); - return LockedMemory(reinterpret_cast(&m_allocator), _handle, 0); - } catch (...) { - return LockedMemory(nullptr, nullptr, 0); - } -} - -void RemoteAllocator::regLockedBlob(void* handle, const RemoteBlobImpl* blob) { - std::lock_guard locker(*this); - auto iter = m_lockedBlobs.find(handle); - if (iter == m_lockedBlobs.end()) { - m_lockedBlobs.emplace(handle, blob); - } -} - -void RemoteAllocator::unlock(void* handle) noexcept { - std::lock_guard locker(*this); - auto iter = m_lockedBlobs.find(handle); - if (iter != m_lockedBlobs.end()) { - iter->second->unlock(); - m_lockedBlobs.erase(iter); - } -} - -ExecutionContextImpl::ExecutionContextImpl(const std::shared_ptr plugin, - const AnyMap& params, - const Config& config) +RemoteContextImpl::RemoteContextImpl(std::string device_name, std::vector devices) : m_va_display(nullptr) , m_external_queue(nullptr) - , m_config(config) , m_type(ContextType::OCL) - , m_plugin(plugin) { - m_lock.clear(std::memory_order_relaxed); + , m_device_name(device_name) + , m_memory_cache(cache_capacity) { + OPENVINO_ASSERT(devices.size() == 1, "[GPU] Currently context can be created for single device only"); + // TODO: Parameterize this based on plugin config and compilation options + auto engine_type = cldnn::engine_types::ocl; + auto runtime_type = cldnn::runtime_types::ocl; + + m_engine = cldnn::engine::create(engine_type, runtime_type, devices.front()); + + GPU_DEBUG_LOG << "Initialize RemoteContext for " << m_device_name << " (" << m_engine->get_device_info().dev_name << ")" << std::endl; +} + +RemoteContextImpl::RemoteContextImpl(const std::vector& known_contexts, const AnyMap& params) + : m_va_display(nullptr) + , m_external_queue(nullptr) + , m_type(ContextType::OCL) + , m_memory_cache(cache_capacity) { gpu_handle_param _context_id = nullptr; gpu_handle_param _va_device = nullptr; int ctx_device_id = 0; @@ -300,18 +44,18 @@ ExecutionContextImpl::ExecutionContextImpl(const std::shared_ptr(params, GPU_PARAM_KEY(CONTEXT_TYPE)); if (GPU_PARAM_VALUE(OCL) == contextTypeStr) { - _context_id = _ObjFromParamSimple(params, GPU_PARAM_KEY(OCL_CONTEXT)); + _context_id = extract_object(params, GPU_PARAM_KEY(OCL_CONTEXT)); if (params.find(GPU_PARAM_KEY(OCL_QUEUE)) != params.end()) - m_external_queue = _ObjFromParamSimple(params, GPU_PARAM_KEY(OCL_QUEUE)); + m_external_queue = extract_object(params, GPU_PARAM_KEY(OCL_QUEUE)); if (params.find(GPU_PARAM_KEY(OCL_CONTEXT_DEVICE_ID)) != params.end()) - ctx_device_id = _ObjFromParamSimple(params, GPU_PARAM_KEY(OCL_CONTEXT_DEVICE_ID)); + ctx_device_id = extract_object(params, GPU_PARAM_KEY(OCL_CONTEXT_DEVICE_ID)); } else if (GPU_PARAM_VALUE(VA_SHARED) == contextTypeStr) { - m_va_display = _va_device = _ObjFromParamSimple(params, GPU_PARAM_KEY(VA_DEVICE)); + m_va_display = _va_device = extract_object(params, GPU_PARAM_KEY(VA_DEVICE)); m_type = ContextType::DEV_SHARED; } else { IE_THROW() << "Invalid execution context type" << contextTypeStr; @@ -329,29 +73,15 @@ ExecutionContextImpl::ExecutionContextImpl(const std::shared_ptrsecond; + OPENVINO_ASSERT(device_map.size() == 1, "[GPU] Only one device expected in case of context sharing"); - auto engine_params = Plugin::GetParams(m_config, dev, m_external_queue); - m_engine = cldnn::engine::create(engine_params.engine_type, - engine_params.runtime_type, dev, - cldnn::engine_configuration(m_config.useProfiling, - engine_params.queue_type, - std::string(), - m_config.queuePriority, - m_config.queueThrottle, - true, - engine_params.use_unified_shared_memory, - m_config.kernels_cache_dir, - m_config.throughput_streams), - engine_params.task_executor); + m_engine = cldnn::engine::create(engine_type, runtime_type, device_map.begin()->second); + m_device_name = get_device_name(known_contexts, m_engine->get_device()); + + GPU_DEBUG_LOG << "Initialize RemoteContext for " << m_device_name << " (" << m_engine->get_device_info().dev_name << ")" << std::endl; } -AnyMap ExecutionContextImpl::getParams() const { +AnyMap RemoteContextImpl::get_params() const { AnyMap ret = { { GPU_PARAM_KEY(OCL_CONTEXT), m_engine->get_user_context() } }; switch (m_type) { @@ -370,26 +100,191 @@ AnyMap ExecutionContextImpl::getParams() const { return ret; } -std::string ExecutionContextImpl::getDeviceName() const noexcept { - auto devName = m_plugin.lock()->GetName(); - - auto engine_type = cldnn::engine_types::ocl; - auto runtime_type = cldnn::runtime_types::ocl; - try { - // Use actual runtime and engine types - cldnn::device_query device_query(engine_type, runtime_type); - auto all_devices = device_query.get_available_devices(); - auto current_device = m_engine->get_device(); - - for (auto& kv : all_devices) { - if (current_device->is_same(kv.second)) - return devName + "." + kv.first; +// For external contexts we try to match underlying handles with default contexts created by plugin to find device name +std::string RemoteContextImpl::get_device_name(const std::vector& known_contexts, + const cldnn::device::ptr current_device) { + std::string device_name = "GPU"; + for (auto& c : known_contexts) { + if (c->get_engine().get_device()->is_same(current_device)) { + device_name = c->get_device_name(); + break; } - } catch (...) { } + } + return device_name; +} - if (!m_config.device_id.empty()) - devName += "." + m_config.device_id; - return devName; +std::string RemoteContextImpl::get_device_name() const noexcept { + return m_device_name; +} + +cldnn::memory::ptr RemoteContextImpl::try_get_cached_memory(size_t hash) { + std::lock_guard lock(m_cache_mutex); + if (m_memory_cache.has(hash)) + return m_memory_cache.get(hash); + + return nullptr; +} + +void RemoteContextImpl::add_to_cache(size_t hash, cldnn::memory::ptr memory) { + std::lock_guard lock(m_cache_mutex); + m_memory_cache.add(hash, memory); +} + +InferenceEngine::RemoteBlob::Ptr RemoteContextImpl::reuse_surface(InferenceEngine::gpu::ClContext::Ptr public_context, + const InferenceEngine::TensorDesc& desc, + const InferenceEngine::ParamMap& params) { + using namespace InferenceEngine; + auto& stream = m_engine->get_service_stream(); + uint32_t plane = extract_object(params, GPU_PARAM_KEY(VA_PLANE)); +#ifdef _WIN32 + cldnn::shared_handle surf = extract_object(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE)); +#else + cldnn::shared_surface surf = extract_object(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE)); +#endif + + cldnn::layout layout(DataTypeFromPrecision(desc.getPrecision()), + ImageFormatFromLayout(desc.getLayout()), + tensor_from_dims(desc.getDims())); + +#ifdef _WIN32 + auto blob = std::make_shared(public_context, stream, + desc, layout, surf, 0, plane, + BlobType::BT_SURF_SHARED); +#else + auto blob = std::make_shared(public_context, stream, + desc, layout, nullptr, surf, plane, + BlobType::BT_SURF_SHARED); +#endif + + return blob; +} + +InferenceEngine::RemoteBlob::Ptr RemoteContextImpl::reuse_memory(InferenceEngine::gpu::ClContext::Ptr public_context, + const InferenceEngine::TensorDesc& desc, + cldnn::shared_handle mem, + BlobType blob_type) { + auto& stream = m_engine->get_service_stream(); + + cldnn::layout layout(DataTypeFromPrecision(desc.getPrecision()), + FormatFromLayout(desc.getLayout()), + tensor_from_dims(desc.getDims())); + + switch (blob_type) { + case BlobType::BT_BUF_SHARED: { + return std::make_shared(public_context, stream, desc, layout, mem, 0, 0, blob_type); + } + case BlobType::BT_USM_SHARED: { + return std::make_shared(public_context, stream, desc, layout, mem, 0, 0, blob_type); + } + case BlobType::BT_IMG_SHARED: { + layout.format = ImageFormatFromLayout(desc.getLayout()); + return std::make_shared(public_context, stream, desc, layout, mem, 0, 0, blob_type); + } +#ifdef _WIN32 + case BlobType::BT_DX_BUF_SHARED: { + return std::make_shared(public_context, stream, desc, layout, mem, 0, 0, blob_type); + } +#endif + default: + break; + } + + return nullptr; +} + +InferenceEngine::RemoteBlob::Ptr RemoteContextImpl::create_buffer(InferenceEngine::gpu::ClContext::Ptr public_context, + const InferenceEngine::TensorDesc& desc) { + cldnn::layout layout(DataTypeFromPrecision(desc.getPrecision()), + FormatFromLayout(desc.getLayout()), + tensor_from_dims(desc.getDims())); + auto& stream = m_engine->get_service_stream(); + return std::make_shared(public_context, + stream, + desc, + layout, + nullptr, 0, 0, + BlobType::BT_BUF_INTERNAL); +} + +InferenceEngine::RemoteBlob::Ptr RemoteContextImpl::create_usm(InferenceEngine::gpu::ClContext::Ptr public_context, + const InferenceEngine::TensorDesc& desc, + BlobType alloc_type) { + cldnn::layout layout(DataTypeFromPrecision(desc.getPrecision()), + FormatFromLayout(desc.getLayout()), + tensor_from_dims(desc.getDims())); + auto& stream = m_engine->get_service_stream(); + + return std::make_shared(public_context, + stream, + desc, + layout, + nullptr, 0, 0, + alloc_type); +} + +void RemoteContextImpl::check_if_shared() { + OPENVINO_ASSERT(m_type == RemoteContextImpl::ContextType::DEV_SHARED, "[GPU] Shared context is required to to share this type of memory"); +} + +InferenceEngine::MemoryBlob::Ptr RemoteContextImpl::create_host_blob(InferenceEngine::gpu::ClContext::Ptr public_context, + const InferenceEngine::TensorDesc& desc) { + if (m_engine->use_unified_shared_memory()) + return std::dynamic_pointer_cast(make_blob_with_precision(desc, std::make_shared(public_context))); + else + return std::dynamic_pointer_cast(make_blob_with_precision(desc)); +} + +InferenceEngine::RemoteBlob::Ptr RemoteContextImpl::create_blob(InferenceEngine::gpu::ClContext::Ptr public_context, + const InferenceEngine::TensorDesc& desc, + const InferenceEngine::ParamMap& params) { + using namespace InferenceEngine; + if (params.empty()) { + // user wants plugin to allocate blob by itself and return handle + return create_buffer(public_context, desc); + } else { + // user will supply shared object handle + std::string mem_type = extract_object(params, GPU_PARAM_KEY(SHARED_MEM_TYPE)); + + bool is_usm = mem_type == GPU_PARAM_VALUE(USM_HOST_BUFFER) || + mem_type == GPU_PARAM_VALUE(USM_DEVICE_BUFFER) || + mem_type == GPU_PARAM_VALUE(USM_USER_BUFFER); + + OPENVINO_ASSERT(!is_usm || m_engine->use_unified_shared_memory(), + "[GPU] Can't create USM tensor as USM is not supported (or manually disabled) on current device"); + + if (GPU_PARAM_VALUE(VA_SURFACE) == mem_type) { + check_if_shared(); + return reuse_surface(public_context, desc, params); + } else if (GPU_PARAM_VALUE(USM_HOST_BUFFER) == mem_type) { + return create_usm(public_context, desc, BlobType::BT_USM_HOST_INTERNAL); + } else if (GPU_PARAM_VALUE(USM_DEVICE_BUFFER) == mem_type) { + return create_usm(public_context, desc, BlobType::BT_USM_DEVICE_INTERNAL); + } else { + BlobType blob_type; + cldnn::shared_handle mem = nullptr; + + if (GPU_PARAM_VALUE(OCL_BUFFER) == mem_type) { + blob_type = BlobType::BT_BUF_SHARED; + mem = extract_object(params, GPU_PARAM_KEY(MEM_HANDLE)); + } else if (GPU_PARAM_VALUE(USM_USER_BUFFER) == mem_type) { + blob_type = BlobType::BT_USM_SHARED; + mem = extract_object(params, GPU_PARAM_KEY(MEM_HANDLE)); + } else if (GPU_PARAM_VALUE(OCL_IMAGE2D) == mem_type) { + blob_type = BlobType::BT_IMG_SHARED; + mem = extract_object(params, GPU_PARAM_KEY(MEM_HANDLE)); +#ifdef _WIN32 + } else if (GPU_PARAM_VALUE(DX_BUFFER) == mem_type) { + blob_type = BlobType::BT_DX_BUF_SHARED; + mem = extract_object(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE)); + check_if_shared(); +#endif + } else { + OPENVINO_ASSERT(false, "[GPU] Unsupported shared object type ", mem_type); + } + + return reuse_memory(public_context, desc, mem, blob_type); + } + } } } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index a5a36fa34d6..887c13da97d 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -127,11 +127,12 @@ void TransformationsPipeline::apply(std::shared_ptr func) { const auto defaultPrecisions = ngraph::pass::low_precision::precision_set::int8_support; bool enableInt8; + bool enable_loop_unrolling = config.get_property(ov::intel_gpu::enable_loop_unrolling); { ngraph::pass::Manager manager; manager.set_per_pass_validation(false); - enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(func); + enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(func); if (enableInt8) { manager.register_pass( std::vector{ ngraph::element::i8, ngraph::element::u8, ngraph::element::i4, ngraph::element::u4 }); @@ -144,7 +145,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); - if (!config.enable_loop_unrolling) { + if (!enable_loop_unrolling) { manager.register_pass(); manager.register_pass(); manager.register_pass(); @@ -158,7 +159,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); - if (config.enable_loop_unrolling) { + if (enable_loop_unrolling) { manager.register_pass(); manager.register_pass(); manager.register_pass(); @@ -205,14 +206,14 @@ void TransformationsPipeline::apply(std::shared_ptr func) { }; // Add conversion from FP data types to infer precision if it's specified - if (config.inference_precision != ov::element::undefined) { - auto inference_precision = config.inference_precision; - if (!fp_precision_supported(inference_precision)) - inference_precision = fallback_precision; + auto infer_precision = config.get_property(ov::hint::inference_precision); + if (infer_precision != ov::element::undefined) { + if (!fp_precision_supported(infer_precision)) + infer_precision = fallback_precision; for (auto& et : fp_element_types) { - if (et != inference_precision) { - convert_precision_list.push_back({et, inference_precision}); + if (et != infer_precision) { + convert_precision_list.push_back({et, infer_precision}); } } } @@ -330,7 +331,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { return isCellPrimitiveSupported(node); }); - if (config.enable_loop_unrolling) { + if (enable_loop_unrolling) { pass_config->set_callback( @@ -550,10 +551,10 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); auto pass_config = manager.get_pass_config(); pass_config->set_callback( - [this](const std::shared_ptr &node) -> bool { + [enable_loop_unrolling](const std::shared_ptr &node) -> bool { auto sub_graph_op = std::dynamic_pointer_cast(node); int64_t num_iter = sub_graph_op->get_num_iterations(); - if (!config.enable_loop_unrolling) + if (!enable_loop_unrolling) return num_iter != 1; return num_iter >= 16; }); diff --git a/src/plugins/intel_gpu/src/plugin/variable_state.cpp b/src/plugins/intel_gpu/src/plugin/variable_state.cpp index 051863937ef..a3baefa1052 100644 --- a/src/plugins/intel_gpu/src/plugin/variable_state.cpp +++ b/src/plugins/intel_gpu/src/plugin/variable_state.cpp @@ -8,18 +8,17 @@ namespace ov { namespace intel_gpu { VariableState::VariableState(const std::string &name, - const std::vector &states, - std::shared_ptr engine, int currentBatch) : - InferenceEngine::IVariableStateInternal {name}, - currentBatch_ {currentBatch}, - states_ {states}, - desc_{ + const std::vector &states, + cldnn::engine& engine, int currentBatch) + : InferenceEngine::IVariableStateInternal {name} + , currentBatch_ {currentBatch} + , states_ {states} + , desc_ { PrecisionFromDataType(states.front()->memory->get_layout().data_type), AggregateShape(states.front()->memory->get_layout()), InferenceEngine::Layout::ANY - }, - engine_ {std::move(engine)} { -} + } + , engine_(engine) { } void VariableState::Reset() { IterateOverStates([this](cldnn::network::VariableState &state) { @@ -31,11 +30,11 @@ void VariableState::SetState(const InferenceEngine::Blob::Ptr &newState) { auto lock = std::dynamic_pointer_cast(newState)->rmap(); auto data = lock.as(); IterateOverStates([&data, this](cldnn::network::VariableState &state) { - state.memory->copy_from(engine_->get_program_stream(), data); + state.memory->copy_from(engine_.get_service_stream(), data); data += state.memory->get_layout().bytes_count(); state.is_set = true; }); - engine_->get_program_stream().enqueue_barrier(); + engine_.get_service_stream().enqueue_barrier(); } InferenceEngine::Blob::CPtr VariableState::GetState() const { @@ -44,7 +43,7 @@ InferenceEngine::Blob::CPtr VariableState::GetState() const { auto blobLock = std::dynamic_pointer_cast(blob)->wmap(); auto data = blobLock.as(); IterateOverStates([&data, this](cldnn::network::VariableState &state) { - cldnn::mem_lock lock { state.memory, engine_->get_program_stream() }; + cldnn::mem_lock lock { state.memory, engine_.get_service_stream() }; std::copy(lock.begin(), lock.end(), data); data += state.memory->get_layout().bytes_count(); }); diff --git a/src/plugins/intel_gpu/src/runtime/engine.cpp b/src/plugins/intel_gpu/src/runtime/engine.cpp index d66bedbab82..f3dc11bf617 100644 --- a/src/plugins/intel_gpu/src/runtime/engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/engine.cpp @@ -56,10 +56,8 @@ static size_t get_cpu_ram_size() { namespace cldnn { -engine::engine(const device::ptr device, const engine_configuration& configuration, const InferenceEngine::ITaskExecutor::Ptr task_executor) -: _task_executor(task_executor) -, _device(device) -, _configuration(configuration) {} +engine::engine(const device::ptr device) + : _device(device) {} device_info engine::get_device_info() const { return _device->get_info(); @@ -74,7 +72,7 @@ bool engine::use_unified_shared_memory() const { GPU_DEBUG_IF(debug_config->disable_usm) { return false; } - if (_device->get_mem_caps().supports_usm() && _configuration.use_unified_shared_memory) { + if (_device->get_mem_caps().supports_usm()) { return true; } return false; @@ -248,19 +246,11 @@ void engine::subtract_memory_used(uint64_t bytes, allocation_type type) { } } -const InferenceEngine::ITaskExecutor::Ptr engine::get_task_executor() { - return _task_executor; -} - -std::shared_ptr engine::create(engine_types engine_type, - runtime_types runtime_type, - const device::ptr device, - const engine_configuration& configuration, - const InferenceEngine::ITaskExecutor::Ptr task_executor) { +std::shared_ptr engine::create(engine_types engine_type, runtime_types runtime_type, const device::ptr device) { std::shared_ptr ret; switch (engine_type) { case engine_types::ocl: - ret = ocl::create_ocl_engine(device, runtime_type, configuration, task_executor); + ret = ocl::create_ocl_engine(device, runtime_type); break; default: throw std::runtime_error("Invalid engine type"); @@ -270,17 +260,14 @@ std::shared_ptr engine::create(engine_types engine_type, return ret; } -std::shared_ptr engine::create(engine_types engine_type, - runtime_types runtime_type, - const engine_configuration& configuration, - const InferenceEngine::ITaskExecutor::Ptr task_executor) { +std::shared_ptr engine::create(engine_types engine_type, runtime_types runtime_type) { device_query query(engine_type, runtime_type); auto devices = query.get_available_devices(); auto iter = devices.find(std::to_string(device_query::device_id)); auto& device = iter != devices.end() ? iter->second : devices.begin()->second; - return engine::create(engine_type, runtime_type, device, configuration, task_executor); + return engine::create(engine_type, runtime_type, device); } } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp new file mode 100644 index 00000000000..e7593e36b89 --- /dev/null +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -0,0 +1,196 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/runtime/execution_config.hpp" +#include "intel_gpu/runtime/debug_configuration.hpp" + +#include + +namespace ov { +namespace intel_gpu { + +ExecutionConfig::ExecutionConfig() { + set_default(); +} + +class InferencePrecisionValidator : public BaseValidator { +public: + bool is_valid(const ov::Any& v) const override { + auto precision = v.as(); + return precision == ov::element::f16 || precision == ov::element::f32; + } +}; + +class PerformanceModeValidator : public BaseValidator { +public: + bool is_valid(const ov::Any& v) const override { + auto mode = v.as(); + return mode == ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT || + mode == ov::hint::PerformanceMode::THROUGHPUT || + mode == ov::hint::PerformanceMode::LATENCY || + mode == ov::hint::PerformanceMode::UNDEFINED; + } +}; + +void ExecutionConfig::set_default() { + register_property( + std::make_tuple(ov::device::id, "0"), + std::make_tuple(ov::enable_profiling, false), + std::make_tuple(ov::cache_dir, ""), + std::make_tuple(ov::num_streams, 1), + std::make_tuple(ov::compilation_num_threads, std::max(1, static_cast(std::thread::hardware_concurrency()))), + std::make_tuple(ov::hint::inference_precision, ov::element::f16, InferencePrecisionValidator()), + std::make_tuple(ov::hint::model_priority, ov::hint::Priority::MEDIUM), + std::make_tuple(ov::hint::performance_mode, ov::hint::PerformanceMode::LATENCY, PerformanceModeValidator()), + std::make_tuple(ov::hint::num_requests, 0), + + std::make_tuple(ov::intel_gpu::hint::host_task_priority, ov::hint::Priority::MEDIUM), + std::make_tuple(ov::intel_gpu::hint::queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM), + std::make_tuple(ov::intel_gpu::hint::queue_priority, ov::hint::Priority::MEDIUM), + std::make_tuple(ov::intel_gpu::enable_loop_unrolling, true), + + // Legacy API properties + std::make_tuple(ov::intel_gpu::enable_dynamic_batch, false), + std::make_tuple(ov::intel_gpu::exclusive_async_requests, false), + std::make_tuple(ov::intel_gpu::nv12_two_inputs, false), + std::make_tuple(ov::intel_gpu::config_file, ""), + std::make_tuple(ov::intel_gpu::enable_lp_transformations, false)); + + register_property( + std::make_tuple(ov::intel_gpu::max_dynamic_batch, 1), + std::make_tuple(ov::intel_gpu::queue_type, QueueTypes::out_of_order), + std::make_tuple(ov::intel_gpu::optimize_data, false), + std::make_tuple(ov::intel_gpu::enable_memory_pool, true), + std::make_tuple(ov::intel_gpu::allow_static_input_reorder, false), + std::make_tuple(ov::intel_gpu::custom_outputs, std::vector{}), + std::make_tuple(ov::intel_gpu::tuning_config, ov::intel_gpu::TuningConfig{}), + std::make_tuple(ov::intel_gpu::dump_graphs, ""), + std::make_tuple(ov::intel_gpu::force_implementations, ImplForcingMap{}), + std::make_tuple(ov::intel_gpu::partial_build_program, false), + std::make_tuple(ov::intel_gpu::allow_new_shape_infer, false)); +} + +void ExecutionConfig::register_property_impl(const std::pair& property, PropertyVisibility visibility, BaseValidator::Ptr validator) { + property_validators[property.first] = validator; + supported_properties[property.first] = visibility; + internal_properties[property.first] = property.second; +} + +void ExecutionConfig::set_property(const AnyMap& config) { + for (auto& kv : config) { + auto& name = kv.first; + auto& val = kv.second; + OPENVINO_ASSERT(is_supported(kv.first), "[GPU] Attepmpt to set property ", name, " (", val.as(), ") which was not registered!\n"); + OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name, ": ", val.as()); + internal_properties[name] = val; + } +} + +bool ExecutionConfig::is_supported(const std::string& name) const { + bool supported = supported_properties.find(name) != supported_properties.end(); + bool has_validator = property_validators.find(name) != property_validators.end(); + + return supported && has_validator; +} + +bool ExecutionConfig::is_set_by_user(const std::string& name) const { + return user_properties.find(name) != user_properties.end(); +} + +void ExecutionConfig::set_user_property(const AnyMap& config) { + for (auto& kv : config) { + auto& name = kv.first; + auto& val = kv.second; + bool supported = is_supported(name) && supported_properties.at(name) == PropertyVisibility::PUBLIC; + OPENVINO_ASSERT(supported, "[GPU] Attepmpt to set user property ", name, " (", val.as(), ") which was not registered or internal!\n"); + OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name, ": `", val.as(), "`"); + + user_properties[kv.first] = kv.second; + } +} + +Any ExecutionConfig::get_property(const std::string& name) const { + if (user_properties.find(name) != user_properties.end()) { + return user_properties.at(name); + } + + OPENVINO_ASSERT(internal_properties.find(name) != internal_properties.end(), "[GPU] Can't get internal property with name ", name); + return internal_properties.at(name); +} + +void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { + if (is_set_by_user(ov::hint::performance_mode)) { + const auto mode = get_property(ov::hint::performance_mode); + if (!is_set_by_user(ov::num_streams)) { + if (mode == ov::hint::PerformanceMode::LATENCY) { + set_property(ov::num_streams(1)); + } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) { + set_property(ov::num_streams(ov::streams::AUTO)); + } + } + } + + if (get_property(ov::num_streams) == ov::streams::AUTO) { + int32_t n_streams = std::max(info.num_ccs, 2); + set_property(ov::num_streams(n_streams)); + } +} + +void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { + if (is_set_by_user(ov::hint::model_priority)) { + const auto priority = get_property(ov::hint::model_priority); + if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { + set_property(ov::intel_gpu::hint::queue_priority(priority)); + } + } +} + +void ExecutionConfig::apply_debug_options(const cldnn::device_info& info) { + GPU_DEBUG_GET_INSTANCE(debug_config); + GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { + set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs)); + } + + GPU_DEBUG_IF(debug_config->serialize_compile == 1) { + set_property(ov::compilation_num_threads(1)); + } +} + +void ExecutionConfig::apply_hints(const cldnn::device_info& info) { + apply_performance_hints(info); + apply_priority_hints(info); + apply_debug_options(info); +} + +void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { + // Copy internal properties before applying hints to ensure that + // a property set by hint won't be overriden by a value in user config. + // E.g num_streams=AUTO && hint=THROUGHPUT + // If we apply hints first and then copy all values from user config to internal one, + // then we'll get num_streams=AUTO in final config while some integer number is expected. + for (auto& kv : user_properties) { + internal_properties[kv.first] = kv.second; + } + apply_hints(info); + if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { + set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); + } + user_properties.clear(); +} + +std::string ExecutionConfig::to_string() const { + std::stringstream s; + s << "internal properties:\n"; + for (auto& kv : internal_properties) { + s << "\t" << kv.first << ": " << kv.second.as() << std::endl; + } + s << "user properties:\n"; + for (auto& kv : user_properties) { + s << "\t" << kv.first << ": " << kv.second.as() << std::endl; + } + return s.str(); +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp index 858160f2b67..27bff7a5b23 100644 --- a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp @@ -58,7 +58,7 @@ namespace cldnn { std::mutex kernels_cache::_mutex; std::string kernels_cache::get_cache_path() const { - auto path = _engine.configuration().kernels_cache_path; + auto path = _config.get_property(ov::cache_dir); if (path.empty()) { return {}; } @@ -76,7 +76,7 @@ bool kernels_cache::is_cache_enabled() const { } } - return !_engine.configuration().kernels_cache_path.empty(); + return !_config.get_property(ov::cache_dir).empty(); } size_t kernels_cache::get_max_kernels_per_batch() const { @@ -156,8 +156,16 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, } } -kernels_cache::kernels_cache(engine& engine, uint32_t prog_id, const std::vector& batch_header_str) - : _engine(engine), _prog_id(prog_id), batch_header_str(std::move(batch_header_str)) { } +kernels_cache::kernels_cache(engine& engine, + const ExecutionConfig& config, + uint32_t prog_id, + InferenceEngine::CPUStreamsExecutor::Ptr task_executor, + const std::vector& batch_header_str) + : _engine(engine) + , _task_executor(task_executor) + , _config(config) + , _prog_id(prog_id) + , batch_header_str(std::move(batch_header_str)) { } kernel_id kernels_cache::set_kernel_source( const std::shared_ptr& kernel_string, @@ -188,8 +196,8 @@ void kernels_cache::build_batch(const engine& build_engine, const batch_program& auto& cl_build_engine = dynamic_cast(build_engine); - bool dump_sources = !_engine.configuration().sources_dumps_dir.empty() || batch.dump_custom_program; - std::string dump_sources_dir = _engine.configuration().sources_dumps_dir; + bool dump_sources = batch.dump_custom_program; + std::string dump_sources_dir = ""; GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(!debug_config->dump_sources.empty()) { dump_sources = true; @@ -371,35 +379,36 @@ void kernels_cache::build_all() { if (!_pending_compilation) return; - std::unique_ptr _build_engine = nullptr; - if (_engine.type() == engine_types::ocl) { - _build_engine = std::unique_ptr(new ocl::ocl_engine(_engine.get_device(), runtime_types::ocl, - _engine.configuration(), _engine.get_task_executor())); - } + ocl::ocl_engine& _build_engine = downcast(_engine); std::vector batches; { std::lock_guard lock(_mutex); get_program_source(_kernels_code, &batches); } - auto _task_executor = _engine.get_task_executor(); - std::exception_ptr exception; - std::vector tasks; - for (size_t idx = 0; idx < batches.size(); idx++) { - auto& batch = batches[idx]; - tasks.push_back([this, &_build_engine, &batch, &exception] { - try { - build_batch(*_build_engine, batch); - } catch(...) { - exception = std::current_exception(); - } - }); - } - _task_executor->runAndWait(tasks); - tasks.clear(); + if (_task_executor) { + std::exception_ptr exception; + std::vector tasks; + for (size_t idx = 0; idx < batches.size(); idx++) { + auto& batch = batches[idx]; + tasks.push_back([this, &_build_engine, &batch, &exception] { + try { + build_batch(_build_engine, batch); + } catch(...) { + exception = std::current_exception(); + } + }); + } + _task_executor->runAndWait(tasks); + tasks.clear(); - if (exception) { - std::rethrow_exception(exception); + if (exception) { + std::rethrow_exception(exception); + } + } else { + for (size_t idx = 0; idx < batches.size(); idx++) { + build_batch(_build_engine, batches[idx]); + } } { @@ -458,8 +467,7 @@ void kernels_cache::compile() { std::unique_ptr _build_engine = nullptr; if (_engine.type() == engine_types::ocl) { - _build_engine = std::unique_ptr(new ocl::ocl_engine(_engine.get_device(), runtime_types::ocl, - _engine.configuration(), _engine.get_task_executor())); + _build_engine = std::unique_ptr(new ocl::ocl_engine(_engine.get_device(), runtime_types::ocl)); } // create batches @@ -497,8 +505,7 @@ void kernels_cache::save(BinaryOutputBuffer& ob) const { } ob << entry_point_to_id; - std::unique_ptr build_engine = - cldnn::make_unique(_engine.get_device(), runtime_types::ocl, _engine.configuration(), _engine.get_task_executor()); + std::unique_ptr build_engine = cldnn::make_unique(_engine.get_device(), runtime_types::ocl); std::vector> precompiled_kernels; @@ -540,7 +547,7 @@ void kernels_cache::load(BinaryInputBuffer& ib) { OPENVINO_ASSERT(_engine.type() == engine_types::ocl, "[GPU] Not supported engine type"); std::unique_ptr build_engine = - cldnn::make_unique(_engine.get_device(), runtime_types::ocl, _engine.configuration(), _engine.get_task_executor()); + cldnn::make_unique(_engine.get_device(), runtime_types::ocl); std::map entry_point_to_id; std::vector> precompiled_kernels; diff --git a/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp b/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp index b8756f270fc..0e429b9229a 100644 --- a/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp +++ b/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp @@ -7,6 +7,7 @@ #include "intel_gpu/graph/serialization/binary_buffer.hpp" #include "intel_gpu/runtime/engine.hpp" #include "intel_gpu/runtime/kernel.hpp" +#include "intel_gpu/runtime/execution_config.hpp" #include #include @@ -76,6 +77,8 @@ public: private: static std::mutex _mutex; engine& _engine; + InferenceEngine::CPUStreamsExecutor::Ptr _task_executor; + ExecutionConfig _config; uint32_t _prog_id = 0; kernels_code _kernels_code; size_t _kernel_idx = 0; @@ -91,7 +94,11 @@ private: size_t get_max_kernels_per_batch() const; public: - explicit kernels_cache(engine& engine, uint32_t prog_id, const std::vector& batch_header_str = {}); + explicit kernels_cache(engine& engine, + const ExecutionConfig& config, + uint32_t prog_id, + InferenceEngine::CPUStreamsExecutor::Ptr task_executor = nullptr, + const std::vector& batch_header_str = {}); kernel_id set_kernel_source(const std::shared_ptr& kernel_string, bool dump_custom_program); kernel::ptr get_kernel(kernel_id id) const; diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.cpp index a501e935e7c..c294178fb46 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.cpp @@ -14,20 +14,20 @@ command_queues_builder::command_queues_builder() : _profiling(false), _out_of_order(false), _supports_queue_families(false), - _priority_mode(priority_mode_types::disabled), - _throttle_mode(throttle_mode_types::disabled) {} + _priority_mode(), + _throttle_mode() {} #if CL_TARGET_OPENCL_VERSION >= 200 std::vector command_queues_builder::get_properties(const cl::Device& device, uint16_t stream_id) { std::vector properties; - if (_priority_mode != priority_mode_types::disabled) { + if (_priority_mode.has_value()) { unsigned cl_queue_priority_value = CL_QUEUE_PRIORITY_MED_KHR; - switch (_priority_mode) { - case priority_mode_types::high: + switch (_priority_mode.value()) { + case ov::hint::Priority::HIGH: cl_queue_priority_value = CL_QUEUE_PRIORITY_HIGH_KHR; break; - case priority_mode_types::low: + case ov::hint::Priority::LOW: cl_queue_priority_value = CL_QUEUE_PRIORITY_LOW_KHR; break; default: @@ -37,13 +37,13 @@ std::vector command_queues_builder::get_properties(const cl properties.insert(properties.end(), {CL_QUEUE_PRIORITY_KHR, cl_queue_priority_value}); } - if (_throttle_mode != throttle_mode_types::disabled) { + if (_throttle_mode.has_value()) { unsigned cl_queue_throttle_value = CL_QUEUE_THROTTLE_MED_KHR; - switch (_throttle_mode) { - case throttle_mode_types::high: + switch (_throttle_mode.value()) { + case ov::intel_gpu::hint::ThrottleLevel::HIGH: cl_queue_throttle_value = CL_QUEUE_THROTTLE_HIGH_KHR; break; - case throttle_mode_types::low: + case ov::intel_gpu::hint::ThrottleLevel::LOW: cl_queue_throttle_value = CL_QUEUE_THROTTLE_LOW_KHR; break; default: @@ -107,27 +107,19 @@ ocl_queue_type command_queues_builder::build(const cl::Context& context, const c #else queue = clCreateCommandQueue(context.get(), device.get(), properties, &error_code); #endif - if (error_code != CL_SUCCESS) { - CLDNN_ERROR_MESSAGE("Command queues builders", - "clCreateCommandQueueWithPropertiesINTEL error " + std::to_string(error_code)); - } - + OPENVINO_ASSERT(error_code == CL_SUCCESS, "[GPU] Command queues builder returned ", error_code, " error code"); return queue; } -void command_queues_builder::set_priority_mode(priority_mode_types priority, bool extension_support) { +void command_queues_builder::set_priority_mode(ov::hint::Priority priority, bool extension_support) { if (extension_support) { _priority_mode = priority; - } else { - _priority_mode = priority_mode_types::disabled; } } -void command_queues_builder::set_throttle_mode(throttle_mode_types throttle, bool extension_support) { +void command_queues_builder::set_throttle_mode(ov::intel_gpu::hint::ThrottleLevel throttle, bool extension_support) { if (extension_support) { _throttle_mode = throttle; - } else { - _throttle_mode = throttle_mode_types::disabled; } } diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.hpp index 30e74761b5c..ee850f911dd 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.hpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.hpp @@ -6,6 +6,7 @@ #include "ocl_common.hpp" #include "intel_gpu/runtime/engine.hpp" +#include "intel_gpu/runtime/optionals.hpp" namespace cldnn { namespace ocl { @@ -14,8 +15,8 @@ class command_queues_builder { public: command_queues_builder(); ocl_queue_type build(const cl::Context& context, const cl::Device& device); - void set_throttle_mode(throttle_mode_types throttle, bool extension_support); - void set_priority_mode(priority_mode_types priority, bool extension_support); + void set_throttle_mode(ov::intel_gpu::hint::ThrottleLevel throttle, bool extension_support); + void set_priority_mode(ov::hint::Priority priority, bool extension_support); void set_profiling(bool flag) { _profiling = flag; } void set_out_of_order(bool flag) { _out_of_order = flag; } void set_supports_queue_families(bool extension_support); @@ -24,8 +25,8 @@ private: bool _profiling; bool _out_of_order; bool _supports_queue_families; - priority_mode_types _priority_mode; - throttle_mode_types _throttle_mode; + optional_value _priority_mode; + optional_value _throttle_mode; #if CL_TARGET_OPENCL_VERSION >= 200 std::vector get_properties(const cl::Device& device, uint16_t stream_id = 0); #else diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp index 4d5485e07bb..79fd76ef4f8 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp @@ -288,7 +288,7 @@ bool ocl_device::is_same(const device::ptr other) { if (!casted) return false; - return _context == casted->get_context() && _device == casted->get_device() && _platform == casted->get_platform(); + return _device == casted->get_device() && _platform == casted->get_platform(); } } // namespace ocl diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp index bf3ea878018..543f7103461 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp @@ -41,9 +41,8 @@ namespace ocl { ocl_error::ocl_error(cl::Error const& err) : ov::Exception("[GPU] " + std::string(err.what()) + std::string(", error code: ") + std::to_string(err.err())) {} -ocl_engine::ocl_engine(const device::ptr dev, runtime_types runtime_type, - const engine_configuration& conf, const InferenceEngine::ITaskExecutor::Ptr task_executor) - : engine(dev, conf, task_executor) { +ocl_engine::ocl_engine(const device::ptr dev, runtime_types runtime_type) + : engine(dev) { OPENVINO_ASSERT(runtime_type == runtime_types::ocl, "[GPU] Invalid runtime type specified for OCL engine. Only OCL runtime is supported"); auto casted = dynamic_cast(dev.get()); @@ -52,12 +51,11 @@ ocl_engine::ocl_engine(const device::ptr dev, runtime_types runtime_type, casted->get_device().getInfo(CL_DEVICE_EXTENSIONS, &_extensions); _usm_helper.reset(new cl::UsmHelper(get_cl_context(), get_cl_device(), use_unified_shared_memory())); - - _program_stream.reset(new ocl_stream(*this)); + _service_stream.reset(new ocl_stream(*this, ExecutionConfig())); } #ifdef ENABLE_ONEDNN_FOR_GPU -dnnl::engine& ocl_engine::get_onednn_engine() const { +void ocl_engine::create_onednn_engine(const ExecutionConfig& config) { const std::lock_guard lock(onednn_mutex); OPENVINO_ASSERT(_device->get_info().vendor_id == INTEL_VENDOR_ID, "[GPU] OneDNN engine can be used for Intel GPUs only"); if (!_onednn_engine) { @@ -65,12 +63,12 @@ dnnl::engine& ocl_engine::get_onednn_engine() const { if (!casted) throw ov::Exception("[GPU] Invalid device type stored in ocl_engine"); - auto config = this->configuration(); - if (config.kernels_cache_path.empty()) { + std::string cache_dir = config.get_property(ov::cache_dir); + if (cache_dir.empty()) { _onednn_engine = std::make_shared(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get())); } else { // Use cached blob - auto path = config.kernels_cache_path; + auto path = cache_dir; if (path.back() != '/' && path.back() != '\\') { path += "/"; } @@ -79,7 +77,7 @@ dnnl::engine& ocl_engine::get_onednn_engine() const { if (blob_id.empty()) { // Create engine without cache_blob _onednn_engine = std::make_shared(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get())); - return *_onednn_engine; + return; } std::string id_str(blob_id.begin(), blob_id.end()); @@ -98,7 +96,10 @@ dnnl::engine& ocl_engine::get_onednn_engine() const { } } } +} +dnnl::engine& ocl_engine::get_onednn_engine() const { + OPENVINO_ASSERT(_onednn_engine, "[GPU] Can't get onednn engine handle as it was not initialized. Please check that create_onednn_engine() was called"); return *_onednn_engine; } #endif @@ -154,7 +155,7 @@ memory::ptr ocl_engine::allocate_memory(const layout& layout, allocation_type ty } if (reset || res->is_memory_reset_needed(layout)) { - res->fill(get_program_stream()); + res->fill(get_service_stream()); } return res; @@ -266,26 +267,24 @@ bool ocl_engine::extension_supported(std::string extension) const { return _extensions.find(extension) != std::string::npos; } -stream::ptr ocl_engine::create_stream() const { - return std::make_shared(*this); +stream::ptr ocl_engine::create_stream(const ExecutionConfig& config) const { + return std::make_shared(*this, config); } -stream::ptr ocl_engine::create_stream(void* handle) const { - return std::make_shared(*this, handle); +stream::ptr ocl_engine::create_stream(const ExecutionConfig& config, void* handle) const { + return std::make_shared(*this, config, handle); } -stream& ocl_engine::get_program_stream() const { - return *_program_stream; +stream& ocl_engine::get_service_stream() const { + return *_service_stream; } -std::shared_ptr ocl_engine::create(const device::ptr device, runtime_types runtime_type, - const engine_configuration& configuration, const InferenceEngine::ITaskExecutor::Ptr task_executor) { - return std::make_shared(device, runtime_type, configuration, task_executor); +std::shared_ptr ocl_engine::create(const device::ptr device, runtime_types runtime_type) { + return std::make_shared(device, runtime_type); } -std::shared_ptr create_ocl_engine(const device::ptr device, runtime_types runtime_type, - const engine_configuration& configuration, const InferenceEngine::ITaskExecutor::Ptr task_executor) { - return ocl_engine::create(device, runtime_type, configuration, task_executor); +std::shared_ptr create_ocl_engine(const device::ptr device, runtime_types runtime_type) { + return ocl_engine::create(device, runtime_type); } } // namespace ocl diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.hpp index 2999c1be157..20fa06f3501 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.hpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.hpp @@ -20,7 +20,7 @@ namespace ocl { class ocl_engine : public engine { public: - ocl_engine(const device::ptr dev, runtime_types runtime_type, const engine_configuration& conf, const InferenceEngine::ITaskExecutor::Ptr task_executor); + ocl_engine(const device::ptr dev, runtime_types runtime_type); engine_types type() const override { return engine_types::ocl; }; runtime_types runtime_type() const override { return runtime_types::ocl; }; @@ -40,27 +40,26 @@ public: bool extension_supported(std::string extension) const; - stream_ptr create_stream() const override; - stream_ptr create_stream(void *handle) const override; - stream& get_program_stream() const override; + stream_ptr create_stream(const ExecutionConfig& config) const override; + stream_ptr create_stream(const ExecutionConfig& config, void *handle) const override; + stream& get_service_stream() const override; #ifdef ENABLE_ONEDNN_FOR_GPU + void create_onednn_engine(const ExecutionConfig& config) override; // Returns onednn engine object which shares device and context with current engine - // If onednn engine has not been created yet, it creates on-demand. dnnl::engine& get_onednn_engine() const override; #endif - static std::shared_ptr create(const device::ptr device, runtime_types runtime_type, - const engine_configuration& configuration, const InferenceEngine::ITaskExecutor::Ptr task_executor); + static std::shared_ptr create(const device::ptr device, runtime_types runtime_type); private: std::string _extensions; - std::unique_ptr _program_stream; + std::unique_ptr _service_stream; std::unique_ptr _usm_helper; #ifdef ENABLE_ONEDNN_FOR_GPU - mutable std::mutex onednn_mutex; - mutable std::shared_ptr _onednn_engine; + std::mutex onednn_mutex; + std::shared_ptr _onednn_engine; #endif }; diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine_factory.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine_factory.hpp index 449c96e8305..2571cad9a9b 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine_factory.hpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine_factory.hpp @@ -13,8 +13,7 @@ namespace ocl { // Factory for ocl_engine creation. It's moved outside of ocl_engine class to avoid possible CL includes conflict // between different engines in engine.cpp file -std::shared_ptr create_ocl_engine(const device::ptr device, runtime_types runtime_type, - const engine_configuration& configuration, InferenceEngine::ITaskExecutor::Ptr task_executor); +std::shared_ptr create_ocl_engine(const device::ptr device, runtime_types runtime_type); } // namespace ocl } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp index ff1b4db7f3a..d30b738e3b3 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp @@ -181,76 +181,62 @@ void set_arguments_impl(ocl_kernel_type& kernel, } } -sync_methods get_expected_sync_method(const engine_configuration &config) { - return config.enable_profiling ? sync_methods::events : config.queue_type == queue_types::out_of_order ? sync_methods::barriers - : sync_methods::none; +sync_methods get_expected_sync_method(const ExecutionConfig& config) { + auto profiling = config.get_property(ov::enable_profiling); + auto queue_type = config.get_property(ov::intel_gpu::queue_type); + return profiling ? sync_methods::events : queue_type == QueueTypes::out_of_order ? sync_methods::barriers + : sync_methods::none; } } // namespace -ocl_stream::ocl_stream(const ocl_engine &engine) - : stream(engine.configuration().queue_type) +ocl_stream::ocl_stream(const ocl_engine &engine, const ExecutionConfig& config) + : stream(config.get_property(ov::intel_gpu::queue_type)) , _engine(engine) - , sync_method(get_expected_sync_method(engine.configuration())) { + , sync_method(get_expected_sync_method(config)) { auto context = engine.get_cl_context(); auto device = engine.get_cl_device(); - auto config = engine.configuration(); ocl::command_queues_builder queue_builder; - queue_builder.set_profiling(config.enable_profiling); - queue_builder.set_out_of_order((config.queue_type == queue_types::out_of_order)); + queue_builder.set_profiling(config.get_property(ov::enable_profiling)); + queue_builder.set_out_of_order(queue_type == QueueTypes::out_of_order); - if (sync_method == sync_methods::none && config.queue_type == queue_types::out_of_order) { + if (sync_method == sync_methods::none && queue_type == QueueTypes::out_of_order) { throw std::runtime_error("[CLDNN] Unexpected sync method (none) is specified for out_of_order queue"); } bool priorty_extensions = engine.extension_supported("cl_khr_priority_hints") && engine.extension_supported("cl_khr_create_command_queue"); - queue_builder.set_priority_mode(config.priority_mode, priorty_extensions); + queue_builder.set_priority_mode(config.get_property(ov::intel_gpu::hint::queue_priority), priorty_extensions); bool throttle_extensions = engine.extension_supported("cl_khr_throttle_hints") && engine.extension_supported("cl_khr_create_command_queue"); - queue_builder.set_throttle_mode(config.throttle_mode, throttle_extensions); + queue_builder.set_throttle_mode(config.get_property(ov::intel_gpu::hint::queue_throttle), throttle_extensions); bool queue_families_extension = engine.get_device_info().supports_queue_families; queue_builder.set_supports_queue_families(queue_families_extension); _command_queue = queue_builder.build(context, device); - -#ifdef ENABLE_ONEDNN_FOR_GPU - if (config.queue_type == queue_types::in_order && engine.get_device_info().vendor_id == INTEL_VENDOR_ID) { - auto onednn_engine = engine.get_onednn_engine(); - _onednn_stream = std::make_shared(dnnl::ocl_interop::make_stream(engine.get_onednn_engine(), _command_queue.get())); - } -#endif } -ocl_stream::ocl_stream(const ocl_engine &engine, void *handle) - : stream(engine.configuration().queue_type) +ocl_stream::ocl_stream(const ocl_engine &engine, const ExecutionConfig& config, void *handle) + : stream(ocl_stream::detect_queue_type(handle)) , _engine(engine) - , sync_method(get_expected_sync_method(engine.configuration())) { + , sync_method(get_expected_sync_method(config)) { auto casted_handle = static_cast(handle); _command_queue = ocl_queue_type(casted_handle, true); - - if (ocl_stream::detect_queue_type(handle) != engine.configuration().queue_type) - throw std::runtime_error("Inconsistent engine config and external user queue are passed to ocl_stream"); - -#ifdef ENABLE_ONEDNN_FOR_GPU - auto config = engine.configuration(); - if (config.queue_type == queue_types::in_order) { - auto onednn_engine = engine.get_onednn_engine(); - _onednn_stream = std::make_shared(dnnl::ocl_interop::make_stream(engine.get_onednn_engine(), _command_queue.get())); - } -#endif } #ifdef ENABLE_ONEDNN_FOR_GPU -dnnl::stream& ocl_stream::get_onednn_stream() const { - if (!_onednn_stream) - throw std::runtime_error("[GPU] onednn stream is nullptr"); +dnnl::stream& ocl_stream::get_onednn_stream() { + OPENVINO_ASSERT(queue_type == QueueTypes::in_order, "[GPU] Can't create onednn stream handle as onednn doesn't support out-of-order queue"); + OPENVINO_ASSERT(_engine.get_device_info().vendor_id == INTEL_VENDOR_ID, "[GPU] Can't create onednn stream handle as for non-Intel devices"); + if (!_onednn_stream) { + _onednn_stream = std::make_shared(dnnl::ocl_interop::make_stream(_engine.get_onednn_engine(), _command_queue.get())); + } return *_onednn_stream; } #endif -queue_types ocl_stream::detect_queue_type(void *queue_handle) { +QueueTypes ocl_stream::detect_queue_type(void *queue_handle) { cl_command_queue queue = static_cast(queue_handle); cl_command_queue_properties properties; auto status = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &properties, nullptr); @@ -258,7 +244,7 @@ queue_types ocl_stream::detect_queue_type(void *queue_handle) { throw std::runtime_error("Can't get queue properties for user handle\n"); } - return (properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) ? queue_types::out_of_order : queue_types::in_order; + return (properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) ? QueueTypes::out_of_order : QueueTypes::in_order; } void ocl_stream::set_arguments(kernel& kernel, const kernel_arguments_desc& args_desc, const kernel_arguments_data& args) { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.hpp index 6cab4415763..61a56c2fbf8 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.hpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.hpp @@ -49,10 +49,10 @@ class ocl_stream : public stream { public: const ocl_queue_type& get_cl_queue() const { return _command_queue; } - explicit ocl_stream(const ocl_engine& engine); - ocl_stream(const ocl_engine &engine, void *handle); + explicit ocl_stream(const ocl_engine& engine, const ExecutionConfig& config); + ocl_stream(const ocl_engine &engine, const ExecutionConfig& config, void *handle); ocl_stream(ocl_stream&& other) - : stream(other._engine.configuration().queue_type) + : stream(other.queue_type) , _engine(other._engine) , _command_queue(other._command_queue) , _queue_counter(other._queue_counter.load()) @@ -80,10 +80,10 @@ public: const cl::UsmHelper& get_usm_helper() const { return _engine.get_usm_helper(); } - static queue_types detect_queue_type(void* queue_handle); + static QueueTypes detect_queue_type(void* queue_handle); #ifdef ENABLE_ONEDNN_FOR_GPU - dnnl::stream& get_onednn_stream() const override; + dnnl::stream& get_onednn_stream() override; #endif private: diff --git a/src/plugins/intel_gpu/src/runtime/stream.cpp b/src/plugins/intel_gpu/src/runtime/stream.cpp index cb6e4f8543c..b320a313fb2 100644 --- a/src/plugins/intel_gpu/src/runtime/stream.cpp +++ b/src/plugins/intel_gpu/src/runtime/stream.cpp @@ -10,7 +10,7 @@ namespace cldnn { -queue_types stream::detect_queue_type(engine_types engine_type, void* queue_handle) { +QueueTypes stream::detect_queue_type(engine_types engine_type, void* queue_handle) { switch (engine_type) { case engine_types::ocl: return ocl::ocl_stream::detect_queue_type(queue_handle); default: throw std::runtime_error("Invalid engine type"); diff --git a/src/plugins/intel_gpu/tests/fusions/activation_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/activation_fusion_test.cpp index 13fa74c69e0..2d45cd8fd74 100644 --- a/src/plugins/intel_gpu/tests/fusions/activation_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/activation_fusion_test.cpp @@ -32,12 +32,12 @@ public: void execute(activation_test_params& p) { auto input_prim = get_mem(get_input_layout(p)); - build_options options; - implementation_desc activation_impl = { p.input_format, p.kernel_name }; - options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::force_implementations({ { "act", activation_impl } })); - network network_fused(this->engine, this->topology_fused, options); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); + ExecutionConfig cfg; + ov::intel_gpu::ImplementationDesc activation_impl = { p.input_format, p.kernel_name }; + cfg.set_property(ov::intel_gpu::optimize_data(true)); + cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "act", activation_impl } })); + network network_fused(this->engine, this->topology_fused, cfg); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); diff --git a/src/plugins/intel_gpu/tests/fusions/batch_to_space_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/batch_to_space_fusion_test.cpp index fddf3e2bfa3..1bb20cd0983 100644 --- a/src/plugins/intel_gpu/tests/fusions/batch_to_space_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/batch_to_space_fusion_test.cpp @@ -36,8 +36,8 @@ public: void execute(batch_to_space_test_params& p) { auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); diff --git a/src/plugins/intel_gpu/tests/fusions/binary_convolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/binary_convolution_fusion_test.cpp index 21231c13df3..7ae2842f6d2 100644 --- a/src/plugins/intel_gpu/tests/fusions/binary_convolution_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/binary_convolution_fusion_test.cpp @@ -40,8 +40,8 @@ class BinaryConvolutionFusingTest : public BaseFusingTestengine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); diff --git a/src/plugins/intel_gpu/tests/fusions/concatenate_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/concatenate_fusion_test.cpp index 96580fd1449..3f3b7fe1e82 100644 --- a/src/plugins/intel_gpu/tests/fusions/concatenate_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/concatenate_fusion_test.cpp @@ -38,20 +38,18 @@ public: auto input0_prim = get_mem(get_input_layout(p)); auto input1_prim = get_mem(get_input_layout(p)); - build_options onednn_options; - build_options cldnn_options; - - onednn_options.set_option(build_option::optimize_data(true)); - cldnn_options.set_option(build_option::optimize_data(true)); - - implementation_desc onednn_impl = { p.input_format, "", impl_types::onednn }; - implementation_desc cldnn_impl = { p.input_format, "", impl_types::ocl }; - onednn_options.set_option(build_option::force_implementations({ { "concat", onednn_impl } })); - cldnn_options.set_option(build_option::force_implementations({ { "concat", cldnn_impl } })); + ov::intel_gpu::ImplementationDesc onednn_impl = { p.input_format, "", impl_types::onednn }; + ov::intel_gpu::ImplementationDesc cldnn_impl = { p.input_format, "", impl_types::ocl }; // for onednn fusing test, topology_non_fused means cldnn, topology_fused is onednn - network network_fused_cldnn(this->engine, this->topology_non_fused, cldnn_options); - network network_fused_onednn(this->engine, this->topology_fused, onednn_options); + ExecutionConfig cldnn_cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), + ov::intel_gpu::optimize_data(true), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "concat", cldnn_impl } })}; + ExecutionConfig onednn_cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), + ov::intel_gpu::optimize_data(true), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "concat", onednn_impl } })}; + network network_fused_cldnn(this->engine, this->topology_non_fused, cldnn_cfg); + network network_fused_onednn(this->engine, this->topology_fused, onednn_cfg); network_fused_cldnn.set_input_data("input0", input0_prim); network_fused_cldnn.set_input_data("input1", input1_prim); diff --git a/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp index 578d7077b74..d1fe3145b6e 100644 --- a/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp @@ -90,8 +90,8 @@ public: } else { input_prim = get_mem(get_input_layout(p), min, max); } - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); @@ -127,8 +127,8 @@ class ConvReorderFusingTest : public BaseFusingTest { public: void execute(convolution_test_params& p, std::map> expected_fused_primitives_ids = {}) { auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); @@ -155,8 +155,8 @@ public: p.expected_fused_primitives = p.expected_fused_primitives_onednn; auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); @@ -189,13 +189,13 @@ class ConvFusingForceKernelTest : public BaseFusingTest public: void execute(bc_force_kernel_params& p) { auto input_prim = get_mem(get_input_layout(p)); - build_options options; - options.set_option(build_option::optimize_data(true)); - implementation_desc conv_impl = { p.input_format, p.kernel_name }; - options.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + ov::intel_gpu::ImplementationDesc conv_impl = { p.input_format, p.kernel_name }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, options); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, config); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); @@ -234,8 +234,7 @@ public: auto input_prim = p.data_type == data_types::u8 ? get_mem(get_input_layout(p), 0, 10) : get_mem(get_input_layout(p)); - auto impl_forcing_bo = bo_fused.get(); - const auto& impl_forcing = impl_forcing_bo->forcing; + auto impl_forcing = cfg_fused.get_property(ov::intel_gpu::force_implementations); auto forcing_format = p.input_format; for (auto& forcing : impl_forcing) { @@ -244,11 +243,12 @@ public: } } - implementation_desc conv_impl = { forcing_format, "", impl_types::onednn }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { forcing_format, "", impl_types::onednn }; - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + auto cfg = cfg_fused; + cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); @@ -598,9 +598,9 @@ class conv_fp32_add_per_element_planar_const : public ConvFusingTest {}; TEST_P(conv_fp32_add_per_element_planar_const, basic) { auto p = GetParam(); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16" }; - implementation_desc permute_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl }, + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16" }; + ov::intel_gpu::ImplementationDesc permute_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl }, { "permute", permute_impl } })); auto out_layout = get_output_layout(p); @@ -787,8 +787,8 @@ TEST_P(conv_fp32_prelu_eltwise, vector_ops) { reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.data_type); execute(p); @@ -808,8 +808,8 @@ TEST_P(conv_fp32_prelu_eltwise, vector_ops_slope_2) { reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.data_type); execute(p); @@ -830,8 +830,8 @@ TEST_P(conv_fp32_prelu_eltwise, vector_ops_mixed_types) { reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.data_type); execute(p); @@ -852,8 +852,8 @@ TEST_P(conv_fp32_prelu_eltwise, vector_ops_mixed_types_slope_2) { reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.data_type); execute(p); @@ -888,8 +888,8 @@ TEST_P(conv_fp32_multi_eltwise_2, basic) { eltwise("eltwise2", input_info("eltwise1"), input_info("conv_prim"), eltwise_mode::prod), reorder("reorder_bfyx", input_info("eltwise2"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.default_type); execute(p); @@ -924,8 +924,8 @@ TEST_P(conv_fp32_multi_eltwise_2_clamp, basic) { eltwise("eltwise2", input_info("activation"), input_info("conv_prim"), eltwise_mode::prod), reorder("reorder_bfyx", input_info("eltwise2"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.default_type); execute(p); @@ -964,8 +964,8 @@ TEST_P(conv_fp32_multi_eltwise_4_clamp, basic) { eltwise("eltwise4_add", input_info("eltwise3_div"), input_info("eltwise4_data"), eltwise_mode::sum), reorder("reorder_bfyx", input_info("eltwise4_add"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.default_type); execute(p); @@ -1003,8 +1003,8 @@ TEST_P(conv_fp32_eltwise_fusing_extend_ops, pattern01_simple_sub) { concatenation("concat", { input_info("eltwise4_sum"), input_info("eltwise4_sum") }, 1), reorder("reorder_bfyx", input_info("concat"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.default_type); execute(p); @@ -1031,8 +1031,8 @@ TEST_P(conv_fp32_eltwise_fusing_extend_ops, pattern02_sub_scale) { concatenation("concat", { input_info("scale"), input_info("scale") }, 1), reorder("reorder_bfyx", input_info("concat"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.default_type); execute(p); @@ -1060,8 +1060,8 @@ TEST_P(conv_fp32_eltwise_fusing_extend_ops, pattern03_sub_div) { concatenation("concat", { input_info("eltwise4_sum"), input_info("eltwise4_sum") }, 1), reorder("reorder_bfyx", input_info("concat"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.default_type); execute(p); @@ -1098,8 +1098,8 @@ TEST_P(conv_fp32_eltwise_fusing_2conv, basic) { concatenation("concat", { input_info("eltwise3"), input_info("eltwise3") }, 1), reorder("reorder_bfyx", input_info("concat"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim0", conv_impl }, { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim0", conv_impl }, { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.default_type); execute(p); @@ -1135,8 +1135,8 @@ TEST_P(conv_fp32_multi_eltwise_3_fusing, basic) { eltwise("eltwise3", input_info("eltwise1"), input_info("eltwise2"), eltwise_mode::prod), reorder("reorder_bfyx", input_info("eltwise3"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.default_type); execute(p); @@ -1208,8 +1208,8 @@ TEST_P(conv_fp32_multi_eltwise_concat, basic) { padding{ { 0, 0, 0, 0 }, 0 }), reorder("reorder_bfyx", input_info("concat"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(output_type); execute(p); @@ -1239,8 +1239,8 @@ TEST_P(conv_fp32_eltwise_b_fs_zyx_fsv16, vector_ops) { reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_zyx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_zyx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.default_type); execute(p); @@ -1292,8 +1292,8 @@ TEST_P(conv_fp32_eltwise_b_fs_zyx_fsv16, splitted_vector_ops) { reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_zyx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_zyx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.default_type); // commented because split mode is disabled @@ -1611,8 +1611,8 @@ class conv_fp32_group_conv_eltwise_sum : public ConvEltwTest {}; TEST_P(conv_fp32_group_conv_eltwise_sum, basic) { auto p = GetParam(); - implementation_desc conv_impl = { format::bfyx, "convolution_gpu_bfyx_os_iyx_osv16", impl_types::ocl }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::bfyx, "convolution_gpu_bfyx_os_iyx_osv16", impl_types::ocl }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); create_topologies( input_layout("input", get_input_layout(p)), @@ -1898,8 +1898,8 @@ TEST_P(conv_int8_prelu_eltwise, fsv16) { ); if (p.default_format.dimension() == 4) { - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); } else { // TODO Add 5D int8 optimized convolution implementations return; @@ -1924,8 +1924,8 @@ TEST_P(conv_int8_prelu_eltwise, fsv16_slope_2) { ); if (p.default_format.dimension() == 4) { - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); } else { // TODO Add 5D int8 optimized convolution implementations return; @@ -1982,8 +1982,8 @@ TEST_P(conv_int8_activation_eltwise_quantize, fsv16) { ); if (p.default_format.dimension() == 4) { - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); } else { // TODO Add 5D int8 optimized convolution implementations return; @@ -2013,8 +2013,8 @@ TEST_P(conv_int8_activation_eltwise_quantize, fsv32) { ); if (p.default_format.dimension() == 4) { - implementation_desc conv_impl = { format::b_fs_yx_fsv32, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv32, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); } else { // TODO Add 5D int8 optimized convolution implementations return; @@ -2054,8 +2054,8 @@ TEST_P(conv_int8_activation_eltwise, fsv16) { ); if (p.default_format.dimension() == 4) { - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); } else { // TODO Add 5D int8 optimized convolution implementations return; @@ -2079,8 +2079,8 @@ TEST_P(conv_int8_activation_eltwise, fsv32) { ); if (p.default_format.dimension() == 4) { - implementation_desc conv_impl = { format::b_fs_yx_fsv32, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv32, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); } else { // TODO Add 5D int8 optimized convolution implementations return; @@ -2238,8 +2238,8 @@ TEST_P(conv_int8_scale_quantize_i8_conv_b_fs_yx_fsv4_int8, basic) { input_info("out_lo"), input_info("out_hi"), 255, data_types::i8), reorder("reorder_bfyx", input_info("quantize"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_yx_fsv4, "convolution_gpu_b_fs_yx_fsv4_int8" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv4, "convolution_gpu_b_fs_yx_fsv4_int8" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = 1.f; execute(p); @@ -2543,8 +2543,8 @@ TEST_P(conv_int8_scale_prelu_quantize_i8_eltwise_fp32_quantize_i8_vec, vector_op reorder("reorder_bfyx", input_info("quantize_1"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_yx_fsv4, "convolution_gpu_b_fs_yx_fsv4_1x1" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv4, "convolution_gpu_b_fs_yx_fsv4_1x1" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = 1.f; execute(p); @@ -2578,8 +2578,8 @@ TEST_P(conv_int8_scale_prelu_quantize_i8_eltwise_fp32_quantize_i8_vec, vector_op reorder("reorder_bfyx", input_info("quantize_1"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::b_fs_yx_fsv4, "convolution_gpu_b_fs_yx_fsv4_1x1" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv4, "convolution_gpu_b_fs_yx_fsv4_1x1" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = 1.f; execute(p); @@ -2611,8 +2611,8 @@ TEST_P(conv_int8_asymmetric_weights, basic) { tolerance = 1.f; auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); @@ -2681,8 +2681,8 @@ TEST_P(conv_int8_asymmetric_data, basic) { tolerance = 1.f; auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); @@ -2753,8 +2753,8 @@ TEST_P(conv_int8_asymmetric_data_and_weights, basic) { tolerance = 1.f; auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); @@ -2900,8 +2900,8 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_basic, basic) { reorder("reorder_out", input_info("activation"), format::bfyx, data_types::f32) ); - implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); execute(p); } @@ -2925,8 +2925,8 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_mean, have_mean) { activation("activation", input_info("conv_prim"), activation_func::abs) ); - implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); execute(p); } @@ -2958,8 +2958,8 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_subtract, have_subtract_per_feature) convolution("conv_output", input_info("reorder_fsv32"), { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation) ); - implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); execute(p); } @@ -2988,9 +2988,9 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation, have_fused_activat activation("activation", input_info("conv_prim2"), activation_func::abs) ); - implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim2", conv_impl } })); - bo_fused.set_option(build_option::force_implementations({ { "activation", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim2", conv_impl } })); + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "activation", conv_impl } })); execute(p); } @@ -3019,9 +3019,9 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_fused_through_activation, have_fused activation("activation", input_info("conv_prim2"), activation_func::abs) ); - implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim2", conv_impl } })); - bo_fused.set_option(build_option::force_implementations({ { "activation", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim2", conv_impl } })); + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "activation", conv_impl } })); execute(p, {{"conv_prim", {"activation_quantize"}}}); } @@ -3049,8 +3049,8 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding, have_data_padding) { reorder("reorder_out", input_info("conv_prim2"), format::fs_b_yx_fsv32, data_types::f32) ); - implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim2", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim2", conv_impl } })); execute(p); } @@ -3334,8 +3334,8 @@ TEST_P(conv_int8_activation_eltwise_quantize_onednn, bsv32_fsv32) { reorder("reorder_bfyx", input_info("quantize"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::bs_fs_yx_bsv32_fsv32, "", impl_types::onednn }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::bs_fs_yx_bsv32_fsv32, "", impl_types::onednn }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = 1.f; execute(p); @@ -3383,8 +3383,8 @@ TEST_P(conv_int8_scale_shift_swish_onednn, bsv32_fsv32) { reorder("reorder_bfyx", input_info("shift1"), p.default_format, data_types::f32) ); - implementation_desc conv_impl = { format::bs_fs_yx_bsv32_fsv32, "", impl_types::onednn }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::bs_fs_yx_bsv32_fsv32, "", impl_types::onednn }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = 1.f; execute(p); @@ -3408,6 +3408,9 @@ class conv_int8_eltwise_scale_onednn : public WeightsPrimitiveFusingTestOneDNN { TEST_P(conv_int8_eltwise_scale_onednn, u8_eltwise_prod_out_reuse) { auto p = GetParam(); + if (!engine.get_device_info().supports_immad) + return; + create_topologies( input_layout("input", get_input_layout(p)), data("weights", get_mem(get_weights_layout(p), -2, 2)), @@ -3425,11 +3428,11 @@ TEST_P(conv_int8_eltwise_scale_onednn, u8_eltwise_prod_out_reuse) { auto input_prim = get_mem(get_input_layout(p)); auto forcing_format = p.input_format; - implementation_desc conv_impl = { forcing_format, "", impl_types::onednn }; - bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); + ov::intel_gpu::ImplementationDesc conv_impl = { forcing_format, "", impl_types::onednn }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); @@ -3839,8 +3842,8 @@ public: return; auto input_prim = p.data_type == data_types::u8 ? get_mem(get_input_layout(p), 0, 10) : get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); @@ -3970,15 +3973,15 @@ public: void execute(implicit_crop_concat_convolution_test_params& p) { auto input_prim = p.data_type == data_types::u8 ? get_mem(get_input_layout(p), 0, 10) : get_mem(get_input_layout(p)); - bo_not_fused = bo_fused; - // implementation_desc quantize_impl = { p.output_format, "quantize_gpu_ref", impl_types::ocl }; - bo_not_fused.set_option(build_option::force_implementations({ + cfg_not_fused = cfg_fused; + // ov::intel_gpu::ImplementationDesc quantize_impl = { p.output_format, "quantize_gpu_ref", impl_types::ocl }; + cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "quantize1", { p.output_format, "quantize_gpu_scale_shift_opt", impl_types::ocl } }, { "quantize2", { p.output_format, "quantize_gpu_scale_shift_opt", impl_types::ocl } }, { "quantize3", { p.output_format, "quantize_gpu_scale_shift_opt", impl_types::ocl } } })); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); compare(network_not_fused, network_fused, p); diff --git a/src/plugins/intel_gpu/tests/fusions/deconvolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/deconvolution_fusion_test.cpp index 0d15bf8e677..f25135080b7 100644 --- a/src/plugins/intel_gpu/tests/fusions/deconvolution_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/deconvolution_fusion_test.cpp @@ -65,8 +65,8 @@ public: if (engine.get_device_info().supports_immad) p.expected_fused_primitives = p.expected_fused_primitives_onednn; - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); @@ -102,8 +102,8 @@ public: p.expected_fused_primitives = p.expected_fused_primitives_onednn; auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); diff --git a/src/plugins/intel_gpu/tests/fusions/depth_to_space_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/depth_to_space_fusion_test.cpp index f67b32d9f15..cec2cb741ef 100644 --- a/src/plugins/intel_gpu/tests/fusions/depth_to_space_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/depth_to_space_fusion_test.cpp @@ -35,8 +35,8 @@ public: void execute(depth_to_space_test_params& p) { auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); diff --git a/src/plugins/intel_gpu/tests/fusions/eltwise_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/eltwise_fusion_test.cpp index f31aa859315..b90e0c2ad9f 100644 --- a/src/plugins/intel_gpu/tests/fusions/eltwise_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/eltwise_fusion_test.cpp @@ -34,8 +34,8 @@ public: auto input_prim = get_mem(get_input_layout(p)); auto input_prim2 = get_mem(get_input_layout2(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); auto inputs = network_fused.get_input_ids(); network_fused.set_input_data("input", input_prim); @@ -222,8 +222,8 @@ TEST_P(eltwise_fp32_fsv16, add) { if (engine.get_device_info().supports_immad) p.expected_fused_primitives++; - implementation_desc eltw_impl = { format::b_fs_yx_fsv16, "eltwise_b_fs_yx_fsv16" }; - bo_fused.set_option(build_option::force_implementations({ { "eltwise", eltw_impl } })); + ov::intel_gpu::ImplementationDesc eltw_impl = { format::b_fs_yx_fsv16, "eltwise_b_fs_yx_fsv16" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "eltwise", eltw_impl } })); tolerance = 1e-5f; execute(p); @@ -244,8 +244,8 @@ TEST_P(eltwise_fp32_fsv16, add_per_element) { if (engine.get_device_info().supports_immad) p.expected_fused_primitives++; - implementation_desc eltw_impl = { format::b_fs_yx_fsv16, "eltwise_b_fs_yx_fsv16" }; - bo_fused.set_option(build_option::force_implementations({ { "eltwise", eltw_impl } })); + ov::intel_gpu::ImplementationDesc eltw_impl = { format::b_fs_yx_fsv16, "eltwise_b_fs_yx_fsv16" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "eltwise", eltw_impl } })); tolerance = 1e-5f; execute(p); @@ -268,8 +268,8 @@ TEST_P(eltwise_fp32_fsv16, add_broadcast) { if (engine.get_device_info().supports_immad) p.expected_fused_primitives++; - implementation_desc eltw_impl = { format::b_fs_yx_fsv16, "eltwise_b_fs_yx_fsv16" }; - bo_fused.set_option(build_option::force_implementations({ { "eltwise", eltw_impl } })); + ov::intel_gpu::ImplementationDesc eltw_impl = { format::b_fs_yx_fsv16, "eltwise_b_fs_yx_fsv16" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "eltwise", eltw_impl } })); tolerance = 1e-5f; execute(p); @@ -296,8 +296,8 @@ TEST_P(eltwise_fp32_fsv32, add) { if (engine.get_device_info().supports_immad) p.expected_fused_primitives++; - implementation_desc eltw_impl = { format::fs_b_yx_fsv32, "eltwise_fs_b_yx_fsv32" }; - bo_fused.set_option(build_option::force_implementations({ { "eltwise", eltw_impl } })); + ov::intel_gpu::ImplementationDesc eltw_impl = { format::fs_b_yx_fsv32, "eltwise_fs_b_yx_fsv32" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "eltwise", eltw_impl } })); tolerance = 1e-5f; execute(p); @@ -318,8 +318,8 @@ TEST_P(eltwise_fp32_fsv32, add_per_element) { if (engine.get_device_info().supports_immad) p.expected_fused_primitives++; - implementation_desc eltw_impl = { format::fs_b_yx_fsv32, "eltwise_fs_b_yx_fsv32" }; - bo_fused.set_option(build_option::force_implementations({ { "eltwise", eltw_impl } })); + ov::intel_gpu::ImplementationDesc eltw_impl = { format::fs_b_yx_fsv32, "eltwise_fs_b_yx_fsv32" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "eltwise", eltw_impl } })); tolerance = 1e-5f; execute(p); @@ -347,8 +347,8 @@ TEST_P(eltwise_fp32_fsv4, add) { if (engine.get_device_info().supports_immad) p.expected_fused_primitives++; - implementation_desc eltw_impl = { format::b_fs_yx_fsv4, "eltwise_b_fs_yx_fsv4" }; - bo_fused.set_option(build_option::force_implementations({ { "eltwise", eltw_impl } })); + ov::intel_gpu::ImplementationDesc eltw_impl = { format::b_fs_yx_fsv4, "eltwise_b_fs_yx_fsv4" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "eltwise", eltw_impl } })); tolerance = 1e-5f; execute(p); @@ -369,8 +369,8 @@ TEST_P(eltwise_fp32_fsv4, add_per_element) { if (engine.get_device_info().supports_immad) p.expected_fused_primitives++; - implementation_desc eltw_impl = { format::b_fs_yx_fsv4, "eltwise_b_fs_yx_fsv4" }; - bo_fused.set_option(build_option::force_implementations({ { "eltwise", eltw_impl } })); + ov::intel_gpu::ImplementationDesc eltw_impl = { format::b_fs_yx_fsv4, "eltwise_b_fs_yx_fsv4" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "eltwise", eltw_impl } })); tolerance = 1e-5f; execute(p); @@ -500,8 +500,8 @@ TEST_P(eltwise_fp16_byxf, add) { if (engine.get_device_info().supports_immad) p.expected_fused_primitives++; - implementation_desc eltw_impl = { format::byxf, "generic_eltwise_ref" }; - bo_fused.set_option(build_option::force_implementations({ { "eltwise", eltw_impl } })); + ov::intel_gpu::ImplementationDesc eltw_impl = { format::byxf, "generic_eltwise_ref" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "eltwise", eltw_impl } })); tolerance = 1e-5f; execute(p); diff --git a/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp index 10692ed12ee..3af55733060 100644 --- a/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp @@ -36,11 +36,11 @@ class FullyConnectedFusingTest : public ::BaseFusingTestget_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, this->bo_not_fused); - network network_fused(this->engine, this->topology_fused, this->bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, this->cfg_not_fused); + network network_fused(this->engine, this->topology_fused, this->cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); @@ -80,19 +80,17 @@ public: auto input_prim = p.data_type == data_types::u8 ? get_mem(get_input_layout(p), 0, 10) : get_mem(get_input_layout(p)); - auto impl_forcing_bo = bo_fused.get(); - const auto& impl_forcing = impl_forcing_bo->forcing; + auto impl_forcing = cfg_fused.get_property(ov::intel_gpu::force_implementations); auto forcing_format = p.input_format; for (auto& forcing : impl_forcing) if (forcing.first == "fc_prim") forcing_format = forcing.second.output_format; - implementation_desc conv_impl = { forcing_format, "", impl_types::onednn }; - bo_fused.set_option(build_option::force_implementations({ { "fc_prim", conv_impl } })); - - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + ov::intel_gpu::ImplementationDesc conv_impl = { forcing_format, "", impl_types::onednn }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim", conv_impl } })); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); diff --git a/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp b/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp index c640a03f496..61d25997aaf 100644 --- a/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp +++ b/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp @@ -18,15 +18,12 @@ using namespace ::tests; template class BaseFusingTest : public ::testing::TestWithParam { public: -#ifdef ENABLE_ONEDNN_FOR_GPU - cldnn::engine& engine = get_onednn_test_engine(); -#else cldnn::engine& engine = get_test_engine(); -#endif cldnn::topology topology_fused; cldnn::topology topology_non_fused; - cldnn::build_options bo_fused; - cldnn::build_options bo_not_fused; + + ExecutionConfig cfg_fused; + ExecutionConfig cfg_not_fused; float tolerance = 0.0f; @@ -34,9 +31,9 @@ public: static const int max_random = 200; void SetUp() override { - bo_fused.set_option(build_option::optimize_data(true)); - bo_not_fused.set_option(build_option::optimize_data(false)); - bo_not_fused.set_option(build_option::allow_static_input_reorder(true)); + cfg_fused.set_property(ov::intel_gpu::optimize_data(true)); + cfg_not_fused.set_property(ov::intel_gpu::optimize_data(false)); + cfg_not_fused.set_property(ov::intel_gpu::allow_static_input_reorder(true)); } void compare(network& not_fused, network& fused, T& p, bool count_reorder = false) { diff --git a/src/plugins/intel_gpu/tests/fusions/fusion_validity_test.cpp b/src/plugins/intel_gpu/tests/fusions/fusion_validity_test.cpp index 2667c6afb27..6198b5fb50d 100644 --- a/src/plugins/intel_gpu/tests/fusions/fusion_validity_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/fusion_validity_test.cpp @@ -39,10 +39,10 @@ class PrimitiveFusingTest : public ::BaseFusingTest { public: void execute(fusing_test_params& p) { - bo_fused.set_option(build_option::allow_static_input_reorder(true)); + cfg_fused.set_property(ov::intel_gpu::allow_static_input_reorder(true)); auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); @@ -78,12 +78,12 @@ TEST_P(format_mismatch_fusing, single_fused_node) { reorder("reorder_bfyx", input_info("eltwise"), p.output_format, data_types::f32) ); - implementation_desc resample_impl = { p.input_format, "resample_opt" }; - bo_fused.set_option(build_option::force_implementations({ { "resample_opt", resample_impl } })); - implementation_desc ref_resample_impl = { p.input_format, "resample_ref" }; - bo_not_fused.set_option(build_option::force_implementations({ { "resample_opt", ref_resample_impl } })); - implementation_desc ref_eltwise = { p.input_format, "" }; - bo_not_fused.set_option(build_option::force_implementations({ { "eltwise_data", ref_eltwise } })); + ov::intel_gpu::ImplementationDesc resample_impl = { p.input_format, "resample_opt" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "resample_opt", resample_impl } })); + ov::intel_gpu::ImplementationDesc ref_resample_impl = { p.input_format, "resample_ref" }; + cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "resample_opt", ref_resample_impl } })); + ov::intel_gpu::ImplementationDesc ref_eltwise = { p.input_format, "" }; + cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "eltwise_data", ref_eltwise } })); tolerance = 1e-5f; execute(p); @@ -112,12 +112,12 @@ TEST_P(format_mismatch_multiple_fusing, multiple_fused_node) { reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32) ); - implementation_desc resample_impl = { p.input_format, "resample_opt" }; - bo_fused.set_option(build_option::force_implementations({ { "resample_prim", resample_impl } })); - implementation_desc ref_resample_impl = { p.input_format, "resample_ref" }; - bo_not_fused.set_option(build_option::force_implementations({ { "resample_prim", ref_resample_impl } })); - implementation_desc ref_eltwise = { p.input_format, "" }; - bo_not_fused.set_option(build_option::force_implementations({ { "eltwise_data", ref_eltwise } })); + ov::intel_gpu::ImplementationDesc resample_impl = { p.input_format, "resample_opt" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "resample_prim", resample_impl } })); + ov::intel_gpu::ImplementationDesc ref_resample_impl = { p.input_format, "resample_ref" }; + cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "resample_prim", ref_resample_impl } })); + ov::intel_gpu::ImplementationDesc ref_eltwise = { p.input_format, "" }; + cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "eltwise_data", ref_eltwise } })); tolerance = 1e-5f; execute(p); diff --git a/src/plugins/intel_gpu/tests/fusions/gather_elements_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/gather_elements_fusion_test.cpp index 426d0cc59c4..109fb1816ca 100644 --- a/src/plugins/intel_gpu/tests/fusions/gather_elements_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/gather_elements_fusion_test.cpp @@ -42,8 +42,8 @@ class GatherElementsPrimitiveFusingTest : public ::BaseFusingTestengine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); compare(network_not_fused, network_fused, p); diff --git a/src/plugins/intel_gpu/tests/fusions/gather_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/gather_fusion_test.cpp index b9ccc2cc5ef..486e3bb2ebb 100644 --- a/src/plugins/intel_gpu/tests/fusions/gather_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/gather_fusion_test.cpp @@ -34,8 +34,8 @@ class GatherPrimitiveFusingTest : public ::BaseFusingTest { public: void execute(gather_test_params& p) { auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); diff --git a/src/plugins/intel_gpu/tests/fusions/gather_nd_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/gather_nd_fusion_test.cpp index b5f03816294..25198bd3825 100644 --- a/src/plugins/intel_gpu/tests/fusions/gather_nd_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/gather_nd_fusion_test.cpp @@ -44,8 +44,8 @@ class GatherNDPrimitiveFusingTest : public ::BaseFusingTestengine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); compare(network_not_fused, network_fused, p); diff --git a/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp index 04d6bdddffc..03467c07fad 100644 --- a/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp @@ -41,14 +41,14 @@ public: auto input1_prim = get_mem(get_input_layout(p, 1)); if (!p.kernel_name.empty()) { - implementation_desc gemm_ref_impl = { format::bfyx, "gemm_ref" }; - implementation_desc gemm_target_impl = { format::bfyx, p.kernel_name }; - bo_fused.set_option(build_option::force_implementations({ {"gemm_prim", gemm_target_impl} })); - bo_not_fused.set_option(build_option::force_implementations({ {"gemm_prim", gemm_ref_impl} })); + ov::intel_gpu::ImplementationDesc gemm_ref_impl = { format::bfyx, "gemm_ref" }; + ov::intel_gpu::ImplementationDesc gemm_target_impl = { format::bfyx, p.kernel_name }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm_prim", gemm_target_impl} })); + cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm_prim", gemm_ref_impl} })); } - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input0", input0_prim); network_not_fused.set_input_data("input0", input0_prim); network_fused.set_input_data("input1", input1_prim); @@ -207,8 +207,8 @@ TEST_P(gemm_2in_quantize_float_in, basic) { reorder("reorder_bfyx", input_info("quantize"), p.default_format, data_types::f32) ); - implementation_desc gemm_impl = { format::bfyx, "gemm_tiled_opt" }; - bo_fused.set_option(build_option::force_implementations({ { "gemm_prim", gemm_impl } })); + ov::intel_gpu::ImplementationDesc gemm_impl = { format::bfyx, "gemm_tiled_opt" }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "gemm_prim", gemm_impl } })); tolerance = default_tolerance(data_types::u8); execute(p); diff --git a/src/plugins/intel_gpu/tests/fusions/loop_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/loop_fusion_test.cpp index af19722e0a5..6e980ac7d25 100644 --- a/src/plugins/intel_gpu/tests/fusions/loop_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/loop_fusion_test.cpp @@ -37,8 +37,8 @@ public: void execute(loop_params& p) { auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); diff --git a/src/plugins/intel_gpu/tests/fusions/lrn_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/lrn_fusion_test.cpp index 8eee6369226..1a97173480e 100644 --- a/src/plugins/intel_gpu/tests/fusions/lrn_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/lrn_fusion_test.cpp @@ -35,12 +35,12 @@ public: void execute(lrn_test_params& p) { auto input_prim = get_mem(get_input_layout(p)); - build_options options; - implementation_desc lrn_impl = { p.input_format, p.kernel_name }; - options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::force_implementations({ { "lrn_norm", lrn_impl } })); - network network_fused(this->engine, this->topology_fused, options); - network network_not_fused(this->engine, this->topology_non_fused, this->bo_not_fused); + ExecutionConfig config; + ov::intel_gpu::ImplementationDesc lrn_impl = { p.input_format, p.kernel_name }; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "lrn_norm", lrn_impl } })); + network network_fused(this->engine, this->topology_fused, config); + network network_not_fused(this->engine, this->topology_non_fused, this->cfg_not_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); diff --git a/src/plugins/intel_gpu/tests/fusions/mvn_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/mvn_fusion_test.cpp index 1efbed06fcc..97f187d01f6 100644 --- a/src/plugins/intel_gpu/tests/fusions/mvn_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/mvn_fusion_test.cpp @@ -39,8 +39,8 @@ public: p.expected_fused_primitives = p.expected_fused_primitives_onednn; auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); diff --git a/src/plugins/intel_gpu/tests/fusions/normalize_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/normalize_fusion_test.cpp index f1b7e5337ca..3647f9e23c3 100644 --- a/src/plugins/intel_gpu/tests/fusions/normalize_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/normalize_fusion_test.cpp @@ -47,8 +47,8 @@ class NormalizeFusingTest : public ::BaseFusingTest { public: void execute(normalize_test_params& p) { auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); diff --git a/src/plugins/intel_gpu/tests/fusions/permute_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/permute_fusion_test.cpp index a2bd2e5a21b..fa9c0b9f7d0 100644 --- a/src/plugins/intel_gpu/tests/fusions/permute_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/permute_fusion_test.cpp @@ -47,8 +47,8 @@ public: void execute(permute_params& p) { auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); @@ -69,8 +69,8 @@ public: void execute(permute_reorder_params& p) { auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); compare(network_not_fused, network_fused, p, true); diff --git a/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp index 1952c2affdd..6751184c2de 100644 --- a/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp @@ -36,14 +36,14 @@ public: if (engine.get_device_info().supports_immad) p.expected_fused_primitives = p.expected_fused_primitives_onednn; auto input_prim = get_mem(get_input_layout(p)); - build_options options; - options.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); if (!p.kernel_name.empty()) { - implementation_desc impl = { p.input_format, p.kernel_name }; - options.set_option(build_option::force_implementations({ { "pooling", impl } })); + ov::intel_gpu::ImplementationDesc impl = { p.input_format, p.kernel_name }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "pooling", impl } })); } - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, options); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, config); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); @@ -537,20 +537,19 @@ public: auto input_prim = get_mem(get_input_layout(p)); - build_options onednn_options; - build_options cldnn_options; + ov::intel_gpu::ImplementationDesc onednn_impl = { p.input_format, "", impl_types::onednn }; + ov::intel_gpu::ImplementationDesc cldnn_impl = { p.input_format, "", impl_types::ocl }; - onednn_options.set_option(build_option::optimize_data(true)); - cldnn_options.set_option(build_option::optimize_data(true)); - - implementation_desc onednn_impl = { p.input_format, "", impl_types::onednn }; - implementation_desc cldnn_impl = { p.input_format, "", impl_types::ocl }; - onednn_options.set_option(build_option::force_implementations({ { "pooling", onednn_impl } })); - cldnn_options.set_option(build_option::force_implementations({ { "pooling", cldnn_impl } })); + ExecutionConfig cldnn_cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), + ov::intel_gpu::optimize_data(true), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "pooling", cldnn_impl } })}; + ExecutionConfig onednn_cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), + ov::intel_gpu::optimize_data(true), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "pooling", onednn_impl } })}; // for onednn fusing test, topology_non_fused means cldnn, topology_fused is onednn - network network_fused_cldnn(this->engine, this->topology_non_fused, cldnn_options); - network network_fused_onednn(this->engine, this->topology_fused, onednn_options); + network network_fused_cldnn(this->engine, this->topology_non_fused, cldnn_cfg); + network network_fused_onednn(this->engine, this->topology_fused, onednn_cfg); network_fused_cldnn.set_input_data("input", input_prim); network_fused_onednn.set_input_data("input", input_prim); diff --git a/src/plugins/intel_gpu/tests/fusions/reduce_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/reduce_fusion_test.cpp index ff91c04a937..d56184c123b 100644 --- a/src/plugins/intel_gpu/tests/fusions/reduce_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/reduce_fusion_test.cpp @@ -37,8 +37,8 @@ public: void execute(reduce_test_params& p) { auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); diff --git a/src/plugins/intel_gpu/tests/fusions/resample_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/resample_fusion_test.cpp index 3f8893100a9..9d4475461d8 100644 --- a/src/plugins/intel_gpu/tests/fusions/resample_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/resample_fusion_test.cpp @@ -35,8 +35,8 @@ public: void execute(resample_test_params& p, std::map> expected_fused_primitives_ids = {}) { auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); diff --git a/src/plugins/intel_gpu/tests/fusions/scatter_elements_update_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/scatter_elements_update_fusion_test.cpp index 167d831e866..b72713b18e3 100644 --- a/src/plugins/intel_gpu/tests/fusions/scatter_elements_update_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/scatter_elements_update_fusion_test.cpp @@ -34,8 +34,8 @@ public: void execute(scatter_elements_update_test_params& p) { auto input_prim = get_mem(get_input_layout(p), -5, 5); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); compare(network_not_fused, network_fused, p); diff --git a/src/plugins/intel_gpu/tests/fusions/scatter_nd_update_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/scatter_nd_update_fusion_test.cpp index c4de8934bc4..139bc10fb31 100644 --- a/src/plugins/intel_gpu/tests/fusions/scatter_nd_update_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/scatter_nd_update_fusion_test.cpp @@ -37,8 +37,8 @@ class ScatterNDUpdatePrimitiveFusingTest : public ::BaseFusingTestengine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); compare(network_not_fused, network_fused, p); diff --git a/src/plugins/intel_gpu/tests/fusions/scatter_update_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/scatter_update_fusion_test.cpp index 26ae63d7397..9a2fc1f3bdb 100644 --- a/src/plugins/intel_gpu/tests/fusions/scatter_update_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/scatter_update_fusion_test.cpp @@ -34,8 +34,8 @@ class ScatterUpdatePrimitiveFusingTest : public ::BaseFusingTestengine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); compare(network_not_fused, network_fused, p); diff --git a/src/plugins/intel_gpu/tests/fusions/softmax_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/softmax_fusion_test.cpp index 61c1ee87552..342841af66a 100644 --- a/src/plugins/intel_gpu/tests/fusions/softmax_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/softmax_fusion_test.cpp @@ -34,8 +34,8 @@ public: void execute(softmax_test_params& p, std::map> expected_fused_primitives_ids = {}) { auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); diff --git a/src/plugins/intel_gpu/tests/fusions/space_to_batch_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/space_to_batch_fusion_test.cpp index 80721b2dc4a..a27e0132564 100644 --- a/src/plugins/intel_gpu/tests/fusions/space_to_batch_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/space_to_batch_fusion_test.cpp @@ -36,8 +36,8 @@ public: void execute(space_to_batch_test_params& p) { auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); diff --git a/src/plugins/intel_gpu/tests/fusions/space_to_depth_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/space_to_depth_fusion_test.cpp index b91cd5d99c4..00b7fedf67c 100644 --- a/src/plugins/intel_gpu/tests/fusions/space_to_depth_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/space_to_depth_fusion_test.cpp @@ -35,8 +35,8 @@ public: void execute(space_to_depth_params& p) { auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); - network network_fused(this->engine, this->topology_fused, bo_fused); + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); network_fused.set_input_data("input", input_prim); network_not_fused.set_input_data("input", input_prim); diff --git a/src/plugins/intel_gpu/tests/module_tests/graph_manipulation_gpu_test.cpp b/src/plugins/intel_gpu/tests/module_tests/graph_manipulation_gpu_test.cpp index 4ef6ee537ad..9b96ff39643 100644 --- a/src/plugins/intel_gpu/tests/module_tests/graph_manipulation_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/module_tests/graph_manipulation_gpu_test.cpp @@ -28,8 +28,8 @@ using namespace ::tests; in similar way as it is done in tests utilizing clDNN API */ TEST(basic, test1) { auto& engine = get_test_engine(); - build_options build_opt; - build_opt.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); auto weights1 = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 1 } }); @@ -49,7 +49,7 @@ TEST(basic, test1) { topology.add(concatenation("concat", { input_info("reorder1"), input_info("weights2") }, 3)); topology.add(convolution("conv2", { input_info("reorder2") }, { "concat" })); - program::ptr prog = program::build_program(engine, topology, build_opt, false); + program::ptr prog = program::build_program(engine, topology, config, false); network::ptr network = network::allocate_network(engine, prog); network->set_input_data("input", input); @@ -67,7 +67,7 @@ TEST(basic, test1) { // Thus, a single method from program like add_intermediate might be tested separately. TEST(add_intermediate_gpu, test1) { - build_options build_opt; + ExecutionConfig config; topology topology; auto& engine = get_test_engine(); @@ -92,7 +92,7 @@ TEST(add_intermediate_gpu, test1) topology.add(cldnn::convolution("conv1b", { input_info("input") }, { "weights" })); topology.add(cldnn::convolution("conv2a", { input_info("conv1a") }, { "weights2" })); auto new_reorder = std::make_shared("reorder", input_info("nothing"), input->get_layout()); - program::ptr prog = program::build_program(engine, topology, build_opt, false, true); + program::ptr prog = program::build_program(engine, topology, config, false, true); prog->add_intermediate(new_reorder, prog->get_node("conv1a"), 0); prog->dump_program("custom_dump", true); @@ -124,7 +124,7 @@ TEST(add_intermediate_gpu, test1) // Disabled for now as it produces wrong results TEST(add_intermediate_gpu, test2) { - build_options build_opt; + ExecutionConfig config; topology topology; auto& engine = get_test_engine(); @@ -153,7 +153,7 @@ TEST(add_intermediate_gpu, test2) w_vec.push_back("weights"); auto new_conv = std::make_shared("conv1a", input_info("input"), w_vec); auto weights_node = std::make_shared("weights", weights); - program::ptr prog = program::build_program(engine, topology, build_opt, false, true); + program::ptr prog = program::build_program(engine, topology, config, false, true); prog->add_intermediate(new_conv, prog->get_node("conv2a"), 0, true, true); program_wrapper::add_connection(*prog, prog->get_or_create(weights_node), prog->get_or_create(new_conv)); diff --git a/src/plugins/intel_gpu/tests/module_tests/usm_memory_test.cpp b/src/plugins/intel_gpu/tests/module_tests/usm_memory_test.cpp index 3c52be6f9a3..8116d22fd05 100644 --- a/src/plugins/intel_gpu/tests/module_tests/usm_memory_test.cpp +++ b/src/plugins/intel_gpu/tests/module_tests/usm_memory_test.cpp @@ -111,7 +111,7 @@ TEST_P(copy_and_read_buffer, basic) { return; } try { - ocl::ocl_stream stream(*_engine); + ocl::ocl_stream stream(*_engine, {}); size_t values_count = 100; size_t values_bytes_count = values_count * sizeof(float); @@ -184,7 +184,7 @@ TEST_P(fill_buffer, DISABLED_basic) { return; } try { - ocl::ocl_stream stream(*_engine); + ocl::ocl_stream stream(*_engine, {}); auto queue = stream.get_cl_queue(); auto usm_helper = stream.get_usm_helper(); diff --git a/src/plugins/intel_gpu/tests/passes/prepare_buffer_fusing_test.cpp b/src/plugins/intel_gpu/tests/passes/prepare_buffer_fusing_test.cpp index 92c775a430a..7652fb03b84 100644 --- a/src/plugins/intel_gpu/tests/passes/prepare_buffer_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/passes/prepare_buffer_fusing_test.cpp @@ -34,9 +34,9 @@ TEST(prepare_buffer_fusing, optimize_reshape) { topology.add(permute("permute2", input_info("reshape"), {0, 3, 2, 1})); topology.add(reorder("reorder", input_info("permute2"), format::bfyx, data_types::f32)); - build_options build_opts; - build_opts.set_option(build_option::allow_new_shape_infer(true)); - auto prog = program::build_program(engine, topology, build_opts, false, true); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + auto prog = program::build_program(engine, topology, config, false, true); program_wrapper::apply_opt_pass(*prog); @@ -76,9 +76,9 @@ TEST(prepare_buffer_fusing, static_node_after_optimized_out_dyn_reshape) { topology.add(fully_connected("fc", input_info("reshape"), "weights", "", {}, 2)); topology.add(reorder("reorder", input_info("fc"), format::bfyx, data_types::f32)); - build_options build_opts; - build_opts.set_option(build_option::allow_new_shape_infer(true)); - auto prog = program::build_program(engine, topology, build_opts, false, true); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + auto prog = program::build_program(engine, topology, config, false, true); ASSERT_NE(prog, nullptr); prog->get_node("reorder").get_output_layout(true); diff --git a/src/plugins/intel_gpu/tests/passes/prepare_primitive_fusing_test.cpp b/src/plugins/intel_gpu/tests/passes/prepare_primitive_fusing_test.cpp index ecec3845a10..b598677fb57 100644 --- a/src/plugins/intel_gpu/tests/passes/prepare_primitive_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/passes/prepare_primitive_fusing_test.cpp @@ -33,9 +33,9 @@ TEST(prepare_primitive_fusing, fuse_activation_to_fc_dyn) { topology.add(activation("act", input_info("fc"), activation_func::relu)); topology.add(reorder("reorder", input_info("act"), format::bfyx, data_types::f32)); - build_options build_opts; - build_opts.set_option(build_option::allow_new_shape_infer(true)); - auto prog = program::build_program(engine, topology, build_opts, false, true); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + auto prog = program::build_program(engine, topology, config, false, true); layout_optimizer lo(true); @@ -59,9 +59,9 @@ TEST(prepare_primitive_fusing, dont_fuse_incompatible_eltwise) { topology.add(eltwise("eltw", { input_info("input"), input_info("reduce") }, eltwise_mode::sum)); topology.add(reorder("reorder", input_info("eltw"), format::bfyx, data_types::f32)); - build_options build_opts; - build_opts.set_option(build_option::allow_new_shape_infer(true)); - auto prog = program::build_program(engine, topology, build_opts, false, true); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + auto prog = program::build_program(engine, topology, config, false, true); layout_optimizer lo(true); @@ -85,10 +85,10 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_legal) { topology.add(eltwise("eltw", { input_info("fc"), input_info("extra_input") }, eltwise_mode::sum)); topology.add(reorder("reorder", input_info("eltw"), format::bfyx, data_types::f32)); - build_options build_opts; - build_opts.set_option(build_option::optimize_data(true)); - build_opts.set_option(build_option::allow_new_shape_infer(true)); - auto prog = program::build_program(engine, topology, build_opts, false, true); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + auto prog = program::build_program(engine, topology, config, false, true); layout_optimizer lo(true); @@ -127,10 +127,10 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_illegal) { topology.add(eltwise("eltw", { input_info("fc"), input_info("extra_input")}, eltwise_mode::sum)); topology.add(reorder("reorder", input_info("eltw"), format::bfyx, data_types::f32)); - build_options build_opts; - build_opts.set_option(build_option::optimize_data(true)); - build_opts.set_option(build_option::allow_new_shape_infer(true)); - auto prog = program::build_program(engine, topology, build_opts, false, true); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + auto prog = program::build_program(engine, topology, config, false, true); layout_optimizer lo(true); @@ -183,10 +183,10 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_illegal_const) { topology.add(eltwise("eltw", { input_info("fc"), input_info("extra_input") }, eltwise_mode::sum)); topology.add(reorder("reorder", input_info("eltw"), format::bfyx, data_types::f32)); - build_options build_opts; - build_opts.set_option(build_option::optimize_data(true)); - build_opts.set_option(build_option::allow_new_shape_infer(true)); - auto prog = program::build_program(engine, topology, build_opts, false, true); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + auto prog = program::build_program(engine, topology, config, false, true); layout_optimizer lo(true); @@ -237,10 +237,10 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_legal_scalar_const_broadca topology.add(eltwise("eltw", { input_info("fc"), input_info("extra_input") }, eltwise_mode::sum)); topology.add(reorder("reorder", input_info("eltw"), format::bfyx, data_types::f32)); - build_options build_opts; - build_opts.set_option(build_option::optimize_data(true)); - build_opts.set_option(build_option::allow_new_shape_infer(true)); - auto prog = program::build_program(engine, topology, build_opts, false, true); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + auto prog = program::build_program(engine, topology, config, false, true); layout_optimizer lo(true); @@ -294,10 +294,10 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_illegal_1) { topology.add(activation("act_fc2", input_info("eltw"), activation_func::relu)); topology.add(reorder("reorder", input_info("act_fc2"), format::bfyx, data_types::f32)); - build_options build_opts; - build_opts.set_option(build_option::optimize_data(true)); - build_opts.set_option(build_option::allow_new_shape_infer(true)); - auto prog = program::build_program(engine, topology, build_opts, false, true); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + auto prog = program::build_program(engine, topology, config, false, true); layout_optimizer lo(true); @@ -363,10 +363,10 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_illegal_2) { topology.add(activation("act_fc3", input_info("eltw"), activation_func::relu)); topology.add(reorder("reorder", input_info("act_fc3"), format::bfyx, data_types::f32)); - build_options build_opts; - build_opts.set_option(build_option::optimize_data(true)); - build_opts.set_option(build_option::allow_new_shape_infer(true)); - auto prog = program::build_program(engine, topology, build_opts, false, true); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + auto prog = program::build_program(engine, topology, config, false, true); layout_optimizer lo(true); diff --git a/src/plugins/intel_gpu/tests/passes/reorder_inputs_test.cpp b/src/plugins/intel_gpu/tests/passes/reorder_inputs_test.cpp index b28be194673..f0203532277 100644 --- a/src/plugins/intel_gpu/tests/passes/reorder_inputs_test.cpp +++ b/src/plugins/intel_gpu/tests/passes/reorder_inputs_test.cpp @@ -42,9 +42,9 @@ TEST(reorder_inputs, propagation) { topology.add(pooling("pool", input_info("conv1"), pooling_mode::max, { 1, 1 }, { 1, 1 })); topology.add(convolution("conv2", input_info("pool"), { "weights" })); - build_options build_opts; - build_opts.set_option(build_option::optimize_data(true)); - auto prog = program::build_program(engine, topology, build_opts); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + auto prog = program::build_program(engine, topology, config); auto prog_impl = prog.get(); @@ -77,12 +77,12 @@ TEST(reorder_inputs, impl_forcing_basic_format) { topology.add(input_layout("input", input->get_layout())); topology.add(pooling("pool", input_info("input"), pooling_mode::max, { 1, 2 }, { 1, 2 })); - implementation_desc pool_impl = { format::yxfb, "" }; + ov::intel_gpu::ImplementationDesc pool_impl = { format::yxfb, "" }; - build_options build_opts; - build_opts.set_option(build_option::force_implementations({ {"pool", pool_impl} })); + ExecutionConfig config; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"pool", pool_impl} })); - network network(engine, topology, build_opts); + network network(engine, topology, config); set_values(input, { 1.f, 2.f, 3.f, 2.f, 7.f, 3.f, -2.f, -1.f }); @@ -115,12 +115,12 @@ TEST(reorder_inputs, impl_forcing_not_existing) { topology.add(input_layout("input", input->get_layout())); topology.add(pooling("pool", input_info("input"), pooling_mode::max, { 1, 2 }, { 1, 2 })); - implementation_desc pool_impl = { format::any, "NOT_EXISTING" }; + ov::intel_gpu::ImplementationDesc pool_impl = { format::any, "NOT_EXISTING" }; - build_options build_opts; - build_opts.set_option(build_option::force_implementations({ {"pool", pool_impl} })); + ExecutionConfig config; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"pool", pool_impl} })); - ASSERT_ANY_THROW(network network(engine, topology, build_opts)); + ASSERT_ANY_THROW(network network(engine, topology, config)); } TEST(reorder_inputs, impl_forcing_basic_format_kernel) { @@ -131,12 +131,12 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) { topology.add(input_layout("input", input->get_layout())); topology.add(activation("actv", input_info("input"), activation_func::relu)); - implementation_desc actv_impl = { format::yxfb, "activation_ref" }; + ov::intel_gpu::ImplementationDesc actv_impl = { format::yxfb, "activation_ref" }; - build_options build_opts; - build_opts.set_option(build_option::force_implementations({ {"actv", actv_impl} })); + ExecutionConfig config; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"actv", actv_impl} })); - network network(engine, topology, build_opts); + network network(engine, topology, config); set_values(input, { -1.f, 2.f, -3.f, 0.5f, 7.f, 3.f, -2.f, -1.f }); @@ -189,10 +189,10 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) { // for (auto impl : possible_impls) { // SCOPED_TRACE(to_string(impl)); // -// build_options build_opts; -// build_opts.set_option(build_option::force_implementations({ {"conv", impl} })); +// ExecutionConfig config; +// config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv", impl} })); // -// network network(engine, topology, build_opts); +// network network(engine, topology, config); // // network.set_input_data("input", input); // network.execute(); diff --git a/src/plugins/intel_gpu/tests/passes/select_preferred_formats_test.cpp b/src/plugins/intel_gpu/tests/passes/select_preferred_formats_test.cpp index 6d214da4ba1..f1ead627667 100644 --- a/src/plugins/intel_gpu/tests/passes/select_preferred_formats_test.cpp +++ b/src/plugins/intel_gpu/tests/passes/select_preferred_formats_test.cpp @@ -33,13 +33,13 @@ TEST(test_select_preferred_formats, setting_target_conv_format) { topology.add(reorder("reorder", input_info("input"), format::b_fs_yx_fsv16, data_types::f16)); topology.add(convolution("conv1", input_info("reorder"), { "weights" })); - build_options build; - build.set_option(build_option::allow_new_shape_infer(true)); - implementation_desc impl = { format::b_fs_yx_fsv16, std::string(""), impl_types::onednn }; - build.set_option(build_option::force_implementations({ {"conv1", impl} })); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, std::string(""), impl_types::onednn }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv1", impl} })); layout_optimizer lo(true); - auto prog = program::build_program(engine, topology, build, false, true); + auto prog = program::build_program(engine, topology, config, false, true); // It initializes output_layout. // It's necessary because this test runs select_preferred_formats pass alone. @@ -62,7 +62,7 @@ TEST(test_select_preferred_formats, setting_target_conv_format) { ASSERT_EQ(output_fmt, format::b_fs_yx_fsv16); } else { ASSERT_EQ(input_fmt, format::any); - ASSERT_EQ(output_fmt, format::any); + ASSERT_EQ(output_fmt, format::any); } } } diff --git a/src/plugins/intel_gpu/tests/passes/test_module_fusing_reorder.cpp b/src/plugins/intel_gpu/tests/passes/test_module_fusing_reorder.cpp index 8feaca7e09d..b775a92c38d 100644 --- a/src/plugins/intel_gpu/tests/passes/test_module_fusing_reorder.cpp +++ b/src/plugins/intel_gpu/tests/passes/test_module_fusing_reorder.cpp @@ -48,13 +48,8 @@ static void setting_onednn_conv(program::ptr prog, layout_optimizer& lo, const p // To test removal of reorder for mixed precision of Onednn conv kernel (conv: u8->fp32) TEST(test_can_fuse_reorder, reorder_for_mixed_type_convolution_fsv32_onednn) { - build_options build_opt; topology topology; -#ifdef ENABLE_ONEDNN_FOR_GPU - auto& engine = get_onednn_test_engine(); -#else auto& engine = get_test_engine(); -#endif layout reorder_layout(data_types::u8, format::b_fs_yx_fsv32, {1, 32, 2, 2}, padding({0, }, 0)); auto input = engine.allocate_memory({ data_types::u8, format::bfyx, {1, 3, 2, 2} }); @@ -68,7 +63,8 @@ TEST(test_can_fuse_reorder, reorder_for_mixed_type_convolution_fsv32_onednn) topology.add(cldnn::convolution("conv", { input_info("reorder_input") }, { "weights" }, { "bias"}, 1, {1, 1}, {0, 0}, {1, 1}, {1, 32, 2, 2}, data_types::f32, false)); topology.add(reorder("reorder_conv", input_info("conv"), reorder_layout)); - program::ptr prog = program::build_program(engine, topology, build_opt, false, true); + ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order)); + program::ptr prog = program::build_program(engine, topology, cfg, false, true); layout_optimizer lo = layout_optimizer(); lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, true); @@ -89,13 +85,8 @@ TEST(test_can_fuse_reorder, reorder_for_mixed_type_convolution_fsv32_onednn) // To test mixed precision of Cldnn conv kernel (conv: u8->fp32) TEST(test_can_fuse_reorder, reorder_for_mixed_type_convolution_fsv32_cldnn) { - build_options build_opt; topology topology; -#ifdef ENABLE_ONEDNN_FOR_GPU - auto& engine = get_onednn_test_engine(); -#else auto& engine = get_test_engine(); -#endif layout reorder_layout(data_types::u8, format::b_fs_yx_fsv32, {1, 32, 2, 2}, padding({0, }, 0)); auto input = engine.allocate_memory({ data_types::u8, format::bfyx, {1, 3, 2, 2} }); @@ -109,7 +100,8 @@ TEST(test_can_fuse_reorder, reorder_for_mixed_type_convolution_fsv32_cldnn) topology.add(cldnn::convolution("conv", { input_info("reorder_input") }, { "weights" }, { "bias"}, 1, {1, 1}, {0, 0}, {1, 1}, {1, 32, 2, 2}, data_types::f32, false)); topology.add(reorder("reorder_conv", input_info("conv"), reorder_layout)); - program::ptr prog = program::build_program(engine, topology, build_opt, false, true); + ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order)); + program::ptr prog = program::build_program(engine, topology, cfg, false, true); layout_optimizer lo = layout_optimizer(); lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, false); @@ -148,11 +140,7 @@ struct reorder_test_param { template class ReorderTest : public ::testing::TestWithParam { public: -#ifdef ENABLE_ONEDNN_FOR_GPU - cldnn::engine& engine = get_onednn_test_engine(); -#else cldnn::engine& engine = get_test_engine(); -#endif layout get_input_layout(T& p) { auto pad = p.pad; @@ -169,7 +157,6 @@ public: class test_fused_reorder_deep_depth : public ReorderTest {}; TEST_P(test_fused_reorder_deep_depth, no_removal_for_deep_depth_conv) { - build_options build_opt; topology topology; auto p = GetParam(); @@ -185,7 +172,8 @@ TEST_P(test_fused_reorder_deep_depth, no_removal_for_deep_depth_conv) topology.add(cldnn::convolution("conv", { input_info("reorder_input") }, { "weights" })); topology.add(reorder("reorder_conv", input_info("conv"), reorder_layout)); - program::ptr prog = program::build_program(engine, topology, build_opt, false, true); + ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order)); + program::ptr prog = program::build_program(engine, topology, cfg, false, true); layout_optimizer lo = layout_optimizer(); lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, true); setting_node(prog, "conv", conv_layout); @@ -220,7 +208,6 @@ INSTANTIATE_TEST_SUITE_P(testing_deep_depth_conv, test_fused_reorder_deep_depth, class test_can_fuse_reorder_cldnn : public ReorderTest {}; TEST_P(test_can_fuse_reorder_cldnn, reorder_for_firstconv_cldnn) { - build_options build_opt; topology topology; auto p = GetParam(); @@ -236,7 +223,8 @@ TEST_P(test_can_fuse_reorder_cldnn, reorder_for_firstconv_cldnn) topology.add(cldnn::convolution("conv2", { input_info("reorder_input") }, { "weights" }, { "bias"}, 1, {1, 1}, {0, 0}, {1, 1}, p.out_shape, p.input_data_type, false)); topology.add(reorder("reorder_conv", input_info("conv2"), reorder_layout)); - program::ptr prog = program::build_program(engine, topology, build_opt, false, true); + ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); + program::ptr prog = program::build_program(engine, topology, cfg, false, true); layout_optimizer lo = layout_optimizer(); lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, false); @@ -266,7 +254,6 @@ INSTANTIATE_TEST_SUITE_P(testing_can_fuse_reorder_first_conv, test_can_fuse_reor class test_can_fuse_reorder_onednn : public ReorderTest {}; TEST_P(test_can_fuse_reorder_onednn, reorder_for_firstconv_onednn) { - build_options build_opt; topology topology; auto p = GetParam(); @@ -282,7 +269,8 @@ TEST_P(test_can_fuse_reorder_onednn, reorder_for_firstconv_onednn) topology.add(cldnn::convolution("conv", { input_info("reorder_input") }, { "weights" })); topology.add(reorder("reorder_result", input_info("conv"), reorder_layout)); - program::ptr prog = program::build_program(engine, topology, build_opt, false, true); + ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order)); + program::ptr prog = program::build_program(engine, topology, cfg, false, true); layout_optimizer lo = layout_optimizer(); lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, true); setting_node(prog, "conv", conv_layout); @@ -311,7 +299,6 @@ INSTANTIATE_TEST_SUITE_P(testing_can_fuse_reorder_first_conv, test_can_fuse_reor class can_fuse_reorder : public ::testing::TestWithParam> {}; TEST_P(can_fuse_reorder, surface_input_reorder) { - build_options build_opt; topology topology; auto& engine = get_test_engine(); @@ -339,7 +326,8 @@ TEST_P(can_fuse_reorder, surface_input_reorder) { topology.add(input_layout_prim, weights_data_prim, surface_input_reorder_prim, conv_input_reorder_prim, conv_prim); - program::ptr prog = program::build_program(engine, topology, build_opt, false, true); + ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); + program::ptr prog = program::build_program(engine, topology, cfg, false, true); layout_optimizer lo = layout_optimizer(); program_wrapper::apply_opt_pass(*prog, lo); @@ -362,7 +350,6 @@ TEST_P(can_fuse_reorder, surface_input_reorder) { } TEST_P(can_fuse_reorder, surface_input_reorder_batched) { - build_options build_opt; topology topology; auto& engine = get_test_engine(); @@ -397,7 +384,8 @@ TEST_P(can_fuse_reorder, surface_input_reorder_batched) { surface_input_reorder_prim1, surface_input_reorder_prim2, conv_input_reorder_prim, concat, conv_prim); - program::ptr prog = program::build_program(engine, topology, build_opt, false, true); + ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); + program::ptr prog = program::build_program(engine, topology, cfg, false, true); layout_optimizer lo = layout_optimizer(); program_wrapper::apply_opt_pass(*prog, lo); @@ -433,9 +421,7 @@ struct onednn_layout_errata_test_param { // Errata cases for onednn convolution layout: both bfyx and byxf are acceptable class test_can_fuse_reorder_onednn_errata : public ReorderTest {}; -TEST_P(test_can_fuse_reorder_onednn_errata, errata_case_for_conv) -{ - build_options build_opt; +TEST_P(test_can_fuse_reorder_onednn_errata, errata_case_for_conv) { topology topology; auto p = GetParam(); if (!engine.get_device_info().supports_immad) @@ -451,7 +437,8 @@ TEST_P(test_can_fuse_reorder_onednn_errata, errata_case_for_conv) topology.add(convolution("conv", { input_info("reorder_conv") }, { "weights" })); topology.add(reorder("reorder_result", input_info("conv"), p.conv_layout)); - program::ptr prog = program::build_program(engine, topology, build_opt, false, true); + ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order)); + program::ptr prog = program::build_program(engine, topology, cfg, false, true); layout_optimizer lo = layout_optimizer(); lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, true); setting_onednn_conv(prog, lo, "conv", p.conv_layout); diff --git a/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp b/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp index 0187666a197..bd41b1cf042 100644 --- a/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp +++ b/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp @@ -104,13 +104,14 @@ TEST_P(broadcast_test_two_inputs_blocked_format, shape_infer) { broadcast("output", input_info("data"), input_info("target_shape"), p.axes_mapping_data, p.mode) ); - build_options options; - options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::allow_new_shape_infer(true)); + ExecutionConfig config { + ov::intel_gpu::optimize_data(true), + ov::intel_gpu::allow_new_shape_infer(true) + }; std::vector input_data(p.data_layout.get_linear_size(), 1); - network network(engine, topology, options); + network network(engine, topology, config); set_values(data_mem, input_data); set_values(in1_mem, p.target_shape_data); diff --git a/src/plugins/intel_gpu/tests/test_cases/activation_simple_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/activation_simple_gpu_test.cpp index 0162e658808..bb7890e372b 100644 --- a/src/plugins/intel_gpu/tests/test_cases/activation_simple_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/activation_simple_gpu_test.cpp @@ -1617,15 +1617,14 @@ struct activation_random_test : testing::TestWithParam{"activation"})}; std::shared_ptr net; if (is_caching_test) { membuf mem_buf; { - cldnn::network _network(engine, topo, build_opts); + cldnn::network _network(engine, topo, config); std::ostream out_mem(&mem_buf); BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem); _network.save(ob); @@ -1633,10 +1632,10 @@ struct activation_random_test : testing::TestWithParam(ib, get_test_stream_ptr(), engine); + net = std::make_shared(ib, config, get_test_stream_ptr(), engine); } } else { - net = std::make_shared(engine, topo, build_opts); + net = std::make_shared(engine, topo, config); } net->set_input_data("in", in_mem); @@ -1654,14 +1653,14 @@ struct activation_random_test : testing::TestWithParam{"activation_blocked", "res_to_input_format"}), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"activation_blocked", {input_format, "activation_ref"}}}) + }; - network net_opt(engine, topo_opt, build_opts_opt); + network net_opt(engine, topo_opt, config_opt); // Use in_mem from ref network net_opt.set_input_data("in", in_mem); diff --git a/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp index a3e846b802b..8440cbfb9a6 100644 --- a/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp @@ -24,8 +24,8 @@ add_reorders optimization pass. //concatenation of incompatible convolutions TEST(add_reorders_gpu, two_convolutions_and_concatenation) { auto& engine = get_test_engine(); - build_options build_opt; - build_opt.set_option(build_option::optimize_data(false)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(false)); auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); auto weights1 = engine.allocate_memory({ data_types::f32, format::yxio,{ 1, 1, 1, 2 } }); @@ -46,7 +46,7 @@ TEST(add_reorders_gpu, two_convolutions_and_concatenation) { topology.add(cldnn::concatenation("concat", { input_info("conv1"), input_info("conv2") }, 1)); - network network(engine, topology, build_opt); + network network(engine, topology, config); network.set_input_data("input", input); //concatenation accepts inputs in different formats, so no reorders should be added here diff --git a/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp index ebb03631bc0..6b6622bbd03 100644 --- a/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp @@ -539,11 +539,11 @@ TEST(top_k_layer_tests, multiple_outputs) { set_values(input, input_vec); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - bo.set_option(build_option::allow_new_shape_infer(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); - network network(engine, topology, bo); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/barriers_test.cpp b/src/plugins/intel_gpu/tests/test_cases/barriers_test.cpp index 4fc9a0c2d14..74d85431740 100644 --- a/src/plugins/intel_gpu/tests/test_cases/barriers_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/barriers_test.cpp @@ -11,10 +11,8 @@ using namespace cldnn; using namespace ::tests; -TEST(DISABLED_oooq_test, simple) -{ - engine_configuration cfg{ false, queue_types::out_of_order }; - auto eng = engine::create(engine_types::ocl, runtime_types::ocl, cfg); +TEST(DISABLED_oooq_test, simple) { + auto eng = engine::create(engine_types::ocl, runtime_types::ocl); auto in_layout = layout{ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }; auto concat_layout = layout{ data_types::f32, format::bfyx, { 1, 1, 1, 2 } }; @@ -43,8 +41,8 @@ TEST(DISABLED_oooq_test, simple) tpl.add(reorder("r8", input_info("c6"), concat_layout, std::vector{ 8 })); tpl.add(concatenation("c9", { input_info("r7"), input_info("r8") }, 2)); - build_options options; - network net{ *eng, tpl, options }; + ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); + network net{ *eng, tpl, cfg }; net.set_input_data("in", input_mem); auto output = net.execute().at("c9").get_memory(); diff --git a/src/plugins/intel_gpu/tests/test_cases/binary_convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/binary_convolution_gpu_test.cpp index 6b2c91b4bda..5c9f0044150 100644 --- a/src/plugins/intel_gpu/tests/test_cases/binary_convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/binary_convolution_gpu_test.cpp @@ -185,8 +185,8 @@ TEST_P(binary_convolution_test, conv) { if(engine.get_device_info().supports_immad) return; - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); topology topology_bin; std::string weights_suffix = "_w_"; @@ -235,7 +235,7 @@ TEST_P(binary_convolution_test, conv) { if (p.is_caching_test) { membuf mem_buf; { - cldnn::network _network(engine, topology_bin, options); + cldnn::network _network(engine, topology_bin, config); std::ostream out_mem(&mem_buf); BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem); _network.save(ob); @@ -246,7 +246,7 @@ TEST_P(binary_convolution_test, conv) { network_bin = std::make_shared(ib, get_test_stream_ptr(), engine); } } else { - network_bin = std::make_shared(engine, topology_bin, options); + network_bin = std::make_shared(engine, topology_bin, config); } network_bin->set_input_data(input_name, input); @@ -399,10 +399,10 @@ TEST(binary_convolution, basic_convolution_1x1_single_packed_channel) { padding{ { 0,0,0,0 }, 0 }) ); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - network network(engine, topology, options); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -485,10 +485,10 @@ TEST(binary_convolution, basic_convolution_1x1_single_packed_channel_fp16) { padding{ { 0,0,0,0 }, 0 }) ); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - network network(engine, topology, options); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/broadcast_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/broadcast_gpu_test.cpp index 085bbf86f9f..40efa44eb92 100644 --- a/src/plugins/intel_gpu/tests/test_cases/broadcast_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/broadcast_gpu_test.cpp @@ -137,12 +137,12 @@ void start_broadcast_test_dynamic(format input_format, set_values(target_shape_mem, target_shape_data); } - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); set_values(input, input_data); - network network(engine, topology, bo); + network network(engine, topology, config); network.set_input_data("input", input); if (!is_output_static) { network.set_input_data("target_shape", target_shape_mem); diff --git a/src/plugins/intel_gpu/tests/test_cases/cache_test.cpp b/src/plugins/intel_gpu/tests/test_cases/cache_test.cpp index 6bbdfdff957..90c3795a290 100644 --- a/src/plugins/intel_gpu/tests/test_cases/cache_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/cache_test.cpp @@ -167,7 +167,7 @@ class cache_test_helper { public: cache_test_helper(cldnn::engine& engine, cache_version v) : _engine(engine) - , _mode(cldnn::tuning_mode::tuning_disabled) + , _mode(ov::intel_gpu::TuningMode::tuning_disabled) , cache_filename(get_temporary_cache_file()) { auto cache = get_cache_version(v); @@ -181,7 +181,7 @@ public: remove(cache_filename); } - cache_test_helper& with_mode(cldnn::tuning_mode mode) { + cache_test_helper& with_mode(ov::intel_gpu::TuningMode mode) { _mode = mode; return *this; } @@ -210,14 +210,14 @@ public: cldnn::convolution("conv", input_info("input"), { "weights" }) ); - auto tune_conf = cldnn::tuning_config_options(); + ov::intel_gpu::TuningConfig tune_conf; tune_conf.cache_file_path = cache_filename; tune_conf.mode = _mode; - auto build_opts = cldnn::build_options( - cldnn::build_option::tuning_config(tune_conf), - cldnn::build_option::optimize_data(true) - ); - cldnn::network network(_engine, topology, build_opts); + ExecutionConfig config{ + ov::intel_gpu::tuning_config(tune_conf), + ov::intel_gpu::optimize_data(true) + }; + cldnn::network network(_engine, topology, config); auto in_mem = _engine.allocate_memory(cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 16, 3, 3 })); network.set_input_data("input", in_mem); network.execute(); @@ -258,7 +258,7 @@ private: cldnn::engine& _engine; - cldnn::tuning_mode _mode; + ov::intel_gpu::TuningMode _mode; std::string cache_filename; @@ -304,7 +304,7 @@ TEST(cache_test, no_cache_baseline) { auto& engine = tests::get_test_engine(); auto helper = cache_test_helper(engine, cache_version::version_2); - helper.with_mode(cldnn::tuning_mode::tuning_disabled) + helper.with_mode(ov::intel_gpu::TuningMode::tuning_disabled) .expect_implementation_not(reference_impl_name) .test(); } @@ -314,7 +314,7 @@ TEST_P(cache_version_test, use_only) { auto& engine = tests::get_test_engine(); cache_test_helper helper(engine, version); - helper.with_mode(cldnn::tuning_mode::tuning_use_cache) + helper.with_mode(ov::intel_gpu::TuningMode::tuning_use_cache) .expect_implementation(reference_impl_name) .expect_cache(version) .test(); @@ -330,7 +330,7 @@ TEST_P(cache_version_test, update) { auto& engine = tests::get_test_engine(); cache_test_helper helper(engine, version); - helper.with_mode(cldnn::tuning_mode::tuning_use_and_update) + helper.with_mode(ov::intel_gpu::TuningMode::tuning_use_and_update) .expect_implementation(reference_impl_name) .expect_cache(ex_version) .test(); @@ -346,7 +346,7 @@ TEST(cache_test, remove_invalid) { auto& engine = tests::get_test_engine(); cache_test_helper helper(engine, cache_version::version_2_invalid); - helper.with_mode(cldnn::tuning_mode::tuning_use_and_update) + helper.with_mode(ov::intel_gpu::TuningMode::tuning_use_and_update) .expect_implementation_not(reference_impl_name) .expect_cache(cache_version::version_2_empty) .test(); diff --git a/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp b/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp index fb1612cb04f..2df2d8620cb 100644 --- a/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp @@ -291,7 +291,7 @@ TEST(cl_mem_check, check_write_access_type) { } auto engine = engine::create(engine_types::ocl, runtime_types::ocl, device); - auto stream = engine->create_stream(); + auto stream = engine->create_stream({}); size_t values_count = 100; size_t values_bytes_count = values_count * sizeof(float); @@ -328,7 +328,7 @@ TEST(cl_mem_check, check_read_access_type) { } auto engine = engine::create(engine_types::ocl, runtime_types::ocl, device); - auto stream = engine->create_stream(); + auto stream = engine->create_stream({}); size_t values_count = 100; size_t values_bytes_count = values_count * sizeof(float); diff --git a/src/plugins/intel_gpu/tests/test_cases/command_queue_test.cpp b/src/plugins/intel_gpu/tests/test_cases/command_queue_test.cpp index 6344caef051..87a8dc56a63 100644 --- a/src/plugins/intel_gpu/tests/test_cases/command_queue_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/command_queue_test.cpp @@ -13,7 +13,7 @@ using namespace std; namespace { // Run some topology too see if command queue does work correctly // Coppied from arg_max_gpu.base test. -void exexute_network(cldnn::engine& engine, bool is_caching_test=false) { +void exexute_network(cldnn::engine& engine, const ExecutionConfig& cfg, bool is_caching_test=false) { // Input : 2x4x2x2 static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2; const int top_k = 2; @@ -41,7 +41,7 @@ void exexute_network(cldnn::engine& engine, bool is_caching_test=false) { if (is_caching_test) { membuf mem_buf; { - cldnn::network _network(engine, topology); + cldnn::network _network(engine, topology, cfg); std::ostream out_mem(&mem_buf); BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem); _network.save(ob); @@ -49,10 +49,10 @@ void exexute_network(cldnn::engine& engine, bool is_caching_test=false) { { std::istream in_mem(&mem_buf); BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine); - network = std::make_shared(ib, get_test_stream_ptr(), engine); + network = std::make_shared(ib, cfg, get_test_stream_ptr(), engine); } } else { - network = std::make_shared(engine, topology); + network = std::make_shared(engine, topology, cfg); } network->set_input_data("input", input); @@ -74,49 +74,31 @@ void exexute_network(cldnn::engine& engine, bool is_caching_test=false) { } // namespace TEST(command_queue_test, test_priority_hints) { - engine_configuration configuration = - engine_configuration( - false, // profiling - queue_types::out_of_order, - "", // sources_dumps_dir - priority_mode_types::low, - throttle_mode_types::disabled); - auto engine = engine::create(engine_types::ocl, runtime_types::ocl, configuration); - exexute_network(*engine); + ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::out_of_order), + ov::intel_gpu::hint::queue_priority(ov::hint::Priority::LOW)}; + auto engine = engine::create(engine_types::ocl, runtime_types::ocl); + exexute_network(*engine, cfg); } TEST(command_queue_test, test_throttle_hints) { - engine_configuration configuration = - engine_configuration( - false, // profiling - queue_types::out_of_order, - "", // sources_dumps_dir - priority_mode_types::disabled, - throttle_mode_types::high); - auto engine = engine::create(engine_types::ocl, runtime_types::ocl, configuration); - exexute_network(*engine); + ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::out_of_order), + ov::intel_gpu::hint::queue_throttle(ov::intel_gpu::hint::ThrottleLevel::HIGH)}; + auto engine = engine::create(engine_types::ocl, runtime_types::ocl); + exexute_network(*engine, cfg); } TEST(command_queue_test, test_priority_and_throttle_hints) { - engine_configuration configuration = - engine_configuration( - false, // profiling - queue_types::out_of_order, - "", // sources_dumps_dir - priority_mode_types::high, - throttle_mode_types::low); - auto engine = engine::create(engine_types::ocl, runtime_types::ocl, configuration); - exexute_network(*engine); + ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::out_of_order), + ov::intel_gpu::hint::queue_priority(ov::hint::Priority::HIGH), + ov::intel_gpu::hint::queue_throttle(ov::intel_gpu::hint::ThrottleLevel::LOW)}; + auto engine = engine::create(engine_types::ocl, runtime_types::ocl); + exexute_network(*engine, cfg); } TEST(export_import_command_queue_test, test_priority_and_throttle_hints) { - engine_configuration configuration = - engine_configuration( - false, // profiling - queue_types::out_of_order, - "", // sources_dumps_dir - priority_mode_types::high, - throttle_mode_types::low); - auto engine = engine::create(engine_types::ocl, runtime_types::ocl, configuration); - exexute_network(*engine, true); + ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::out_of_order), + ov::intel_gpu::hint::queue_priority(ov::hint::Priority::HIGH), + ov::intel_gpu::hint::queue_throttle(ov::intel_gpu::hint::ThrottleLevel::LOW)}; + auto engine = engine::create(engine_types::ocl, runtime_types::ocl); + exexute_network(*engine, cfg, true); } diff --git a/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp index 5712a469683..9dedaf6ac8d 100644 --- a/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp @@ -205,9 +205,9 @@ TEST(concat_gpu, i8_optimization_with_pool) { data_types::i8, padding{{0, 0, 0, 0}, 0}), reorder("reorder", input_info("concat"), reorder_layout)); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); - network network(engine, topology, options); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input0", input0); network.set_input_data("input1", input1); auto outputs = network.execute(); @@ -307,9 +307,9 @@ TEST(concat_gpu, i8_optimization_with_conv) { data("weights", weights), convolution("conv", input_info("concat"), { "weights" }, { 2, 1 }), reorder("output", input_info("conv"), reorder_layout)); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); - network network(engine, topology, options); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input0", input0); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -408,9 +408,9 @@ TEST(concat_gpu, i8_optimization_with_pool_conv) { data("weights", weights), convolution("conv", input_info("concat"), {"weights"}, {1, 1}, {0, 1}), reorder("output", input_info("conv"), reorder_layout) ); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); - network network(engine, topology, options); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input0", input0); network.set_input_data("input1", input1); auto outputs = network.execute(); @@ -585,9 +585,9 @@ public: topology.add(concatenation("concat", input_ids, 1)); - build_options options; - options.set_option(build_option::optimize_data(true)); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); for (size_t i = 0; i < in_features.size(); i++) { network.set_input_data(input_ids[i].pid, in_memory[i]); @@ -675,9 +675,9 @@ public: topology.add(concatenation("concat", input_ids, 3)); - build_options options; - options.set_option(build_option::optimize_data(true)); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); for (size_t i = 0; i < input_x.size(); i++) { network.set_input_data(input_ids[i].pid, in_memory[i]); @@ -839,11 +839,11 @@ public: topology.add(data("weights", weights_mem)); topology.add(convolution("conv", input_info("concat"), { "weights" })); - build_options options; - options.set_option(build_option::optimize_data(true)); - auto conv_forcing = implementation_desc{ fmt, std::string() }; - options.set_option(build_option::force_implementations({ {primitive_id("conv"), conv_forcing} })); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + auto conv_forcing = ov::intel_gpu::ImplementationDesc{ fmt, std::string() }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {primitive_id("conv"), conv_forcing} })); + network network(engine, topology, config); for (size_t i = 0; i < in_features.size(); i++) { network.set_input_data(input_ids[i].pid, in_memory[i]); @@ -913,7 +913,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_low_precision, template struct concat_gpu_4d_implicit : public concat_gpu { public: - cldnn::memory::ptr run_concat_network(std::vector>>>> input, format::type fmt, build_options options) { + cldnn::memory::ptr run_concat_network(std::vector>>>> input, format::type fmt, ExecutionConfig config) { auto data_type = type_to_data_type::value; auto& engine = get_test_engine(); const size_t batch_num = testing::get<0>(GetParam()); @@ -986,7 +986,7 @@ public: if (is_caching_test) { membuf mem_buf; { - cldnn::network _network(engine, topology, options); + cldnn::network _network(engine, topology, config); std::ostream out_mem(&mem_buf); BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem); _network.save(ob); @@ -994,10 +994,10 @@ public: { std::istream in_mem(&mem_buf); BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine); - concat_network = std::make_shared(ib, get_test_stream_ptr(), engine); + concat_network = std::make_shared(ib, config, get_test_stream_ptr(), engine); } } else { - concat_network = std::make_shared(engine, topology, options); + concat_network = std::make_shared(engine, topology, config); } for (size_t i = 0; i < in_features.size(); i++) { @@ -1005,7 +1005,7 @@ public: } concat_network->execute(); - bool concat_opt_enabled = options.get()->enabled(); + bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); bool concat_opt_result = std::static_pointer_cast(concat_network->get_primitive("concat"))->can_be_optimized(); EXPECT_EQ(concat_opt_enabled, concat_opt_result); @@ -1029,15 +1029,15 @@ public: auto input = generate_input(); // implicit concat - build_options options1; - options1.set_option(build_option::optimize_data(true)); - auto out_mem1 = run_concat_network(input, fmt, options1); + ExecutionConfig config1; + config1.set_property(ov::intel_gpu::optimize_data(true)); + auto out_mem1 = run_concat_network(input, fmt, config1); cldnn::mem_lock out_ptr1(out_mem1, get_test_stream()); // explicit concat - build_options options2; - options2.set_option(build_option::optimize_data(false)); - auto out_mem2 = run_concat_network(input, fmt, options2); + ExecutionConfig config2; + config2.set_property(ov::intel_gpu::optimize_data(false)); + auto out_mem2 = run_concat_network(input, fmt, config2); cldnn::mem_lock out_ptr2(out_mem2, get_test_stream()); ASSERT_EQ(out_ptr1.size(), out_ptr2.size()); @@ -1078,7 +1078,7 @@ TEST_P(concat_implicit_gpu_4d_i8, input_order_opt_b_fs_yx_fsv32) { #ifdef ENABLE_ONEDNN_FOR_GPU TEST(concat_gpu_onednn, basic_input_types) { - auto& engine = get_onednn_test_engine(); + auto& engine = get_test_engine(); if (!engine.get_device_info().supports_immad) return; @@ -1114,12 +1114,12 @@ TEST(concat_gpu_onednn, basic_input_types) { padding{ { 0,0,0,0 }, 0 }) ); - build_options options_target; - options_target.set_option(build_option::outputs({ "concat" })); - implementation_desc impl = { format::bfyx, std::string(""), impl_types::onednn }; - options_target.set_option(build_option::force_implementations({ {"concat", impl} })); + ov::intel_gpu::ImplementationDesc impl = { format::bfyx, std::string(""), impl_types::onednn }; - network network(engine, topology, options_target); + ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), + ov::intel_gpu::custom_outputs(std::vector{ "concat" }), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"concat", impl} })}; + network network(engine, topology, cfg); network.set_input_data("input0", input0); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -1152,9 +1152,9 @@ TEST(concat_gpu_onednn, basic_input_types) { template struct concat_gpu_4d_implicit_onednn : public concat_gpu { public: - cldnn::memory::ptr run_concat_network(std::vector>>>> input, format::type fmt, build_options options) { + cldnn::memory::ptr run_concat_network(std::vector>>>> input, format::type fmt, ExecutionConfig config) { auto data_type = type_to_data_type::value; - auto& engine = get_onednn_test_engine(); + auto& engine = get_test_engine(); const size_t batch_num = testing::get<0>(GetParam()); const std::vector in_features = testing::get<1>(GetParam()); const size_t input_y = testing::get<2>(GetParam()); @@ -1204,10 +1204,11 @@ public: topology.add(concatenation("concat", pooling_ids, 1)); auto weights_lay = cldnn::layout(data_type, cldnn::format::bfyx, tensor(batch(output_f), feature(output_f))); auto weights_mem = engine.allocate_memory(weights_lay); - weights_mem->fill(get_test_stream()); - get_test_stream().finish(); + auto& stream = get_test_stream(); + weights_mem->fill(stream); + stream.finish(); { - cldnn::mem_lock weights_ptr(weights_mem, get_test_stream()); + cldnn::mem_lock weights_ptr(weights_mem, stream); for (size_t fi = 0; fi < output_f; ++fi) { auto coords = tensor(batch(fi), feature(fi), spatial(0, 0, 0, 0)); auto offset = weights_lay.get_linear_offset(coords); @@ -1219,13 +1220,13 @@ public: topology.add(pooling("pool_final", input_info("conv"), pooling_mode::max, {1, 1}, {1, 1})); topology.add(reorder("reorder", input_info("pool_final"), layout(data_type, format::bfyx, {(int32_t)batch_num, (int32_t)output_f, (int32_t)input_y, (int32_t)input_x}))); - network concat_network(engine, topology, options); + network concat_network(engine, topology, config); for (size_t i = 0; i < in_features.size(); i++) { concat_network.set_input_data(input_ids[i], in_memory[i]); } concat_network.execute(); - bool concat_opt_enabled = options.get()->enabled(); + bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); bool concat_opt_result = std::static_pointer_cast(concat_network.get_primitive("concat"))->node->can_be_optimized(); EXPECT_EQ(concat_opt_enabled, concat_opt_result); @@ -1246,7 +1247,8 @@ public: } void test(format::type fmt) { - auto& engine = get_onednn_test_engine(); + auto& engine = get_test_engine(); + auto& stream = get_test_stream(); if (!engine.get_device_info().supports_immad) { // This case is only for device that uses onednn. return; @@ -1254,18 +1256,21 @@ public: auto input = generate_input(); // implicit concat - build_options options1; - options1.set_option(build_option::optimize_data(true)); - implementation_desc impl = { fmt, std::string(""), impl_types::onednn }; - options1.set_option(build_option::force_implementations({ {"conv", impl} })); - auto out_mem1 = run_concat_network(input, fmt, options1); - cldnn::mem_lock out_ptr1(out_mem1, get_test_stream()); + ExecutionConfig config1; + config1.set_property(ov::intel_gpu::optimize_data(true)); + ov::intel_gpu::ImplementationDesc impl = { fmt, std::string(""), impl_types::onednn }; + config1.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv", impl} })); + config1.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); + + auto out_mem1 = run_concat_network(input, fmt, config1); + cldnn::mem_lock out_ptr1(out_mem1, stream); // explicit concat - build_options options2; - options2.set_option(build_option::optimize_data(false)); - auto out_mem2 = run_concat_network(input, fmt, options2); - cldnn::mem_lock out_ptr2(out_mem2, get_test_stream()); + ExecutionConfig config2; + config2.set_property(ov::intel_gpu::optimize_data(false)); + config2.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); + auto out_mem2 = run_concat_network(input, fmt, config2); + cldnn::mem_lock out_ptr2(out_mem2, stream); ASSERT_EQ(out_ptr1.size(), out_ptr2.size()); size_t diff_count = 0; diff --git a/src/plugins/intel_gpu/tests/test_cases/condition_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/condition_gpu_test.cpp index afb0cc54ef4..ff69d4a482e 100644 --- a/src/plugins/intel_gpu/tests/test_cases/condition_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/condition_gpu_test.cpp @@ -86,8 +86,8 @@ std::pair, std::vector> get_values_to_compare(const cl TEST(DISABLED_condition_gpu, basic_equal_comp) { auto& engine = get_test_engine(); - build_options bs; - bs.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); auto scale_mem = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); @@ -112,7 +112,7 @@ TEST(DISABLED_condition_gpu, basic_equal_comp) { eltwise("output", { input_info("condi"), input_info("scale_data") }, eltwise_mode::prod) ); - network net(engine, topology, bs); + network net(engine, topology, config); set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f }); set_values(scale_mem, { 10.0f }); net.set_input_data("input", input); @@ -138,8 +138,8 @@ TEST(DISABLED_condition_gpu, basic_equal_comp) { TEST(DISABLED_condition_gpu, basic_range_equal_comp) { auto& engine = get_test_engine(); - build_options bs; - bs.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); @@ -186,7 +186,7 @@ TEST(DISABLED_condition_gpu, basic_range_equal_comp) { set_values(input0, input0_data); set_values(input1, input1_data); - network net(engine, topology, bs); + network net(engine, topology, config); net.set_input_data("input0", input0); net.set_input_data("input1", input1); @@ -211,8 +211,8 @@ TEST(DISABLED_condition_gpu, basic_range_equal_comp) { TEST(DISABLED_condition_gpu, generic_test_true_false) { auto& engine = get_test_engine(); - build_options bs; - bs.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 5, 2, 5, 1 } }); std::vector input_data(50); std::iota(input_data.begin(), input_data.end(), 0.0f); @@ -281,7 +281,7 @@ TEST(DISABLED_condition_gpu, generic_test_true_false) { ); set_values(input, input_data); - network net(engine, topology, bs); + network net(engine, topology, config); net.set_input_data("input", input); decltype(net.execute()) outputs; @@ -320,8 +320,8 @@ TEST(DISABLED_condition_gpu, basic_stacked_ifs) { */ auto& engine = get_test_engine(); - build_options bs; - bs.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); auto compare2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }); @@ -367,7 +367,7 @@ TEST(DISABLED_condition_gpu, basic_stacked_ifs) { set_values(compare, compare_data); set_values(compare2, compare_2_data); - network net(engine, topology, bs); + network net(engine, topology, config); net.set_input_data("input", input); net.set_input_data("compare", compare); net.set_input_data("compare2", compare2); @@ -390,8 +390,8 @@ TEST(DISABLED_condition_gpu, basic_nested_ifs) { */ auto& engine = get_test_engine(); - build_options bs; - bs.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); auto compare2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }); @@ -460,7 +460,7 @@ TEST(DISABLED_condition_gpu, basic_nested_ifs) { set_values(compare, compare_data); set_values(compare2, compare_2_data); - network net(engine, topology, bs); + network net(engine, topology, config); net.set_input_data("input", input); net.set_input_data("compare", compare); net.set_input_data("compare2", compare2); @@ -472,8 +472,8 @@ TEST(DISABLED_condition_gpu, basic_nested_ifs) { TEST(DISABLED_condition_gpu, negative_compare_wrong_layout) { auto& engine = get_test_engine(); - build_options bs; - bs.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 5, 1 } }); @@ -491,13 +491,13 @@ TEST(DISABLED_condition_gpu, negative_compare_wrong_layout) { condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL) ); - EXPECT_ANY_THROW(network net(engine, topology, bs);); + EXPECT_ANY_THROW(network net(engine, topology, config);); } TEST(DISABLED_condition_gpu, negative_too_big_offset) { auto& engine = get_test_engine(); - build_options bs; - bs.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 1 } }); @@ -515,13 +515,13 @@ TEST(DISABLED_condition_gpu, negative_too_big_offset) { condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL, {1, 1, 2, 1}) ); - EXPECT_ANY_THROW(network net(engine, topology, bs);); + EXPECT_ANY_THROW(network net(engine, topology, config);); } TEST(DISABLED_condition_gpu, negative_not_same_layouts) { auto& engine = get_test_engine(); - build_options bs; - bs.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); @@ -546,13 +546,13 @@ TEST(DISABLED_condition_gpu, negative_not_same_layouts) { condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL) ); - EXPECT_ANY_THROW(network net(engine, topology, bs);); + EXPECT_ANY_THROW(network net(engine, topology, config);); } TEST(DISABLED_condition_gpu, negative_same_names_within_different_networks) { auto& engine = get_test_engine(); - build_options bs; - bs.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); @@ -580,5 +580,5 @@ TEST(DISABLED_condition_gpu, negative_same_names_within_different_networks) { pooling("pooling_check_name", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 }) ); - EXPECT_ANY_THROW(network net(engine, topology, bs);); + EXPECT_ANY_THROW(network net(engine, topology, config);); } diff --git a/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp index b1bd9eabc57..06c2196019b 100644 --- a/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp @@ -1182,9 +1182,9 @@ TEST(convolution_f32_fw_gpu, three_convolutions_same_weights) { convolution("conv3", input_info("conv2"), { "weights" }) ); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); - network network(engine, topology, options); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1311,9 +1311,9 @@ TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout) { input_layout("weights", weights->get_layout()), input_layout("biases", biases->get_layout()), convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 1 }, { 0, 0 })); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); - network network(engine, topology, options); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); network.set_input_data("weights", weights); network.set_input_data("biases", biases); @@ -1372,9 +1372,9 @@ TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout_non_ input_layout("weights", weights->get_layout()), data("biases", biases), convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 1 }, { 0, 0 })); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(false)); - network network(engine, topology, options, true); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(false)); + network network(engine, topology, config, true); network.set_input_data("input", input); network.set_input_data("weights", weights); auto outputs = network.execute(); @@ -3752,9 +3752,9 @@ TEST(convolution_gpu, DISABLED_two_1x1_kernels_after_each_other) { conv_2 ); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -4006,8 +4006,8 @@ TEST(convolution_f32_fw_gpu, byte_activation) { { 0, 0, 0 } } }; - build_options opts; - opts.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); set_values(input, { 1, 2, -3, 4, -5, 2, -2, 3, -4, 6, @@ -4017,7 +4017,7 @@ TEST(convolution_f32_fw_gpu, byte_activation) { topology topology( input_layout("input", input->get_layout())); add_primitives(engine, topology); - network network(engine, topology, opts); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -4081,9 +4081,9 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_symmetric) { convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 2 }, {0, 0}, { 1, 1 }, tensor{ 1, 2, 3, 2 }), reorder("out", input_info("conv"), format::bfyx, data_types::f32)); - build_options opts; - opts.set_option(build_option::optimize_data(true)); - network network(engine, topology, opts); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -4155,9 +4155,9 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weight_an { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, false), reorder("out", input_info("conv"), format::bfyx, data_types::f32)); - build_options opts; - opts.set_option(build_option::optimize_data(true)); - network network(engine, topology, opts); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -4226,9 +4226,9 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, false), reorder("out", input_info("conv"), format::bfyx, data_types::f32)); - build_options opts; - opts.set_option(build_option::optimize_data(true)); - network network(engine, topology, opts); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -4311,9 +4311,9 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, false), reorder("out", input_info("conv"), format::bfyx, data_types::f32)); - build_options opts; - opts.set_option(build_option::optimize_data(true)); - network network(engine, topology, opts); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -4412,9 +4412,9 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, data_types::f32, false), reorder("out", input_info("conv"), format::bfyx, data_types::f32)); - build_options opts; - opts.set_option(build_option::optimize_data(true)); - network network(engine, topology, opts); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -4483,9 +4483,9 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weights_p { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, false), reorder("out", input_info("conv"), format::bfyx, data_types::f32)); - build_options opts; - opts.set_option(build_option::optimize_data(true)); - network network(engine, topology, opts); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -5034,11 +5034,11 @@ TEST_P(convolution_gpu_fs_byx_fsv32, fs_byx_fsv32) } - build_options options; - implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" }; - options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); - options.set_option(build_option::optimize_data(true)); - network network(engine, topology, options); + ExecutionConfig config; + ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -5135,11 +5135,11 @@ TEST(convolution_f16_fsv_gpu, convolution_f16_fsv_gpu_padding) { topology.add(conv_fsv); - build_options options; - implementation_desc conv_impl = { format::fs_b_yx_fsv32, "convolution_gpu_bfyx_to_fs_byx_fsv32" }; - options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); - options.set_option(build_option::optimize_data(true)); - network network(engine, topology, options); + ExecutionConfig config; + ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "convolution_gpu_bfyx_to_fs_byx_fsv32" }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -5342,11 +5342,11 @@ TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop) } } - build_options options; - implementation_desc conv_impl = { format::fs_b_yx_fsv32, "convolution_gpu_bfyx_to_fs_byx_fsv32" }; - options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); - options.set_option(build_option::optimize_data(true)); - network network(engine, topology, options); + ExecutionConfig config; + ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "convolution_gpu_bfyx_to_fs_byx_fsv32" }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -5416,9 +5416,9 @@ TEST(convolution_f32_fw_gpu, convolution_int8_b_fs_yx_fsv4_to_bfyx) { padding{ { 0, 0, output_padding, output_padding }, 0 }), reorder("output", input_info("conv"), { data_types::f32, format::bfyx, { batch_num, input_f, input_size_x, input_size_y } })); - build_options build_opt; + ExecutionConfig config_ref; - network network_ref(engine, topology_ref, build_opt); + network network_ref(engine, topology_ref, config_ref); network_ref.set_input_data("input", input); auto outputs = network_ref.execute(); @@ -5438,11 +5438,11 @@ TEST(convolution_f32_fw_gpu, convolution_int8_b_fs_yx_fsv4_to_bfyx) { padding{ { 0, 0, output_padding, output_padding }, 0 }), reorder("output", input_info("conv"), { data_types::f32,format::bfyx, { batch_num, input_f, input_size_x, input_size_y } })); - build_options build_opt_act; + ExecutionConfig config_act; - build_opt_act.set_option(build_option::optimize_data(true)); + config_act.set_property(ov::intel_gpu::optimize_data(true)); - network network_act(engine, topology_act, build_opt_act); + network network_act(engine, topology_act, config_act); network_act.set_input_data("input", input); auto outputs_act = network_act.execute(); @@ -5578,10 +5578,10 @@ TEST(convolution_gpu, bfyx_iyxo_5x5_fp16) } - build_options options; - implementation_desc conv_impl = { format::bfyx, "" }; - options.set_option(build_option::optimize_data(true)); - network network(engine, topology, options); + ExecutionConfig config; + ov::intel_gpu::ImplementationDesc conv_impl = { format::bfyx, "" }; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -5805,10 +5805,10 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32) topology.add(reorder("reorder_bfzyx", input_info("conv_bsv16_fsv16"), format::bfzyx, data_types::f32)); - build_options options; - options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfzyx" })); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv_bsv16_fsv16", "reorder_bfzyx" })); + network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -5942,10 +5942,10 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp16) topology.add(reorder("reorder_bfzyx", input_info("conv_bsv16_fsv16"), format::bfzyx, data_types::f16)); - build_options options; - options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfzyx" })); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv_bsv16_fsv16", "reorder_bfzyx" })); + network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -6078,10 +6078,10 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32_fused_ops) topology.add(reorder("reorder_bfzyx", input_info("scale"), format::bfzyx, data_types::f32)); - build_options options; - options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfzyx" })); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv_bsv16_fsv16", "reorder_bfzyx" })); + network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -6239,12 +6239,12 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32) topology.add(reorder("reorder_bfyx", input_info("conv_bsv16_fsv16"), format::bfyx, data_types::f32)); - build_options options; - options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfyx" })); - implementation_desc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" }; - options.set_option(build_option::force_implementations({ { "conv_bsv16_fsv16", conv_impl } })); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv_bsv16_fsv16", "reorder_bfyx" })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_bsv16_fsv16", conv_impl } })); + network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -6379,12 +6379,12 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp16) topology.add(reorder("reorder_bfyx", input_info("conv_bsv16_fsv16"), format::bfyx, data_types::f16)); - build_options options; - options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfyx" })); - implementation_desc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" }; - options.set_option(build_option::force_implementations({ { "conv_bsv16_fsv16", conv_impl } })); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv_bsv16_fsv16", "reorder_bfyx" })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_bsv16_fsv16", conv_impl } })); + network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -6517,12 +6517,12 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32_fused_ops) topology.add(reorder("reorder_bfyx", input_info("scale"), format::bfyx, data_types::f32)); - build_options options; - options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfyx" })); - implementation_desc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" }; - options.set_option(build_option::force_implementations({ { "conv_bsv16_fsv16", conv_impl } })); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv_bsv16_fsv16", "reorder_bfyx" })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_bsv16_fsv16", conv_impl } })); + network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -6655,11 +6655,11 @@ TEST_P(convolution_depthwise_gpu, depthwise_conv_fs_b_yx_fsv32) topology.add(conv_fsv); - build_options options; - options.set_option(build_option::optimize_data(true)); - implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" }; - options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); + network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -6798,11 +6798,11 @@ TEST_P(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16) topology.add(conv_fsv); - build_options options; - options.set_option(build_option::optimize_data(true)); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); + network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -6929,11 +6929,11 @@ TEST_P(convolution_depthwise_gpu_fsv16_xy, depthwise_conv_b_fs_yx_fsv16) topology.add(conv_fsv); - build_options options; - options.set_option(build_option::optimize_data(true)); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16_depthwise" }; - options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16_depthwise" }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); + network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -7019,12 +7019,12 @@ TEST(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16_in_feature_pa convolution("conv", input_info("input_reordered"), { "weights" }, { "bias" }, num_groups, stride, pad, dilation, output_size, data_types::f32, true), reorder("out", input_info("conv"), format::bfyx, data_types::f32)); - build_options options; - options.set_option(build_option::optimize_data(true)); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - options.set_option(build_option::force_implementations({ { "conv", conv_impl } })); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl } })); - network network(engine, topology, options); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -7132,11 +7132,11 @@ TEST_P(convolution_depthwise_gpu_bfyx, depthwise_conv_bfyx) topology.add(conv_fsv); - build_options options; - options.set_option(build_option::optimize_data(true)); - implementation_desc conv_impl = { format::bfyx, "" }; - options.set_option(build_option::force_implementations({ { "conv", conv_impl } })); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + ov::intel_gpu::ImplementationDesc conv_impl = { format::bfyx, "" }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl } })); + network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -7453,13 +7453,13 @@ TEST_P(convolution_grouped_gpu, base) { if (has_comp) topology.add(data(comp_prim_name[0], comp)); - build_options options; - options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::outputs({ "conv", "out" })); - implementation_desc conv_impl = { input_data_format, impl_name }; - options.set_option(build_option::force_implementations({ { "conv", conv_impl } })); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv", "out" })); + ov::intel_gpu::ImplementationDesc conv_impl = { input_data_format, impl_name }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl } })); - cldnn::network network(engine, topology, options); + cldnn::network network(engine, topology, config); network.set_input_data("input", input); network.execute(); @@ -7614,11 +7614,11 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) { conv_fsv.output_paddings = {padding({ 0, 0, output_padding, output_padding }, 0.f)}; topology.add(conv_fsv); } - build_options options; - options.set_option(build_option::optimize_data(true)); - implementation_desc conv_impl = { input_data_format, impl_name }; - options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + ov::intel_gpu::ImplementationDesc conv_impl = { input_data_format, impl_name }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); + network network(engine, topology, config); network.set_input_data("input", input_mem); network.execute(); @@ -7722,11 +7722,11 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_padding) topology.add(reorder_bfyx); // format 8 to 3 -> after fusing, removed // Exec ref network (non-fusing) - build_options options_ref; - options_ref.set_option(build_option::optimize_data(false)); - options_ref.set_option(build_option::allow_static_input_reorder(true)); + ExecutionConfig config_ref; + config_ref.set_property(ov::intel_gpu::optimize_data(false)); + config_ref.set_property(ov::intel_gpu::allow_static_input_reorder(true)); - network network_ref(engine, topology, options_ref); + network network_ref(engine, topology, config_ref); network_ref.set_input_data("input_origin", input_mem); auto ref_out = network_ref.execute(); @@ -7734,12 +7734,12 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_padding) cldnn::mem_lock ref_out_ptr(ref_out_mem, get_test_stream()); // Exec target network (fusing: conv+reorder) - build_options options_target; - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16" }; - options_target.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); - options_target.set_option(build_option::optimize_data(true)); + ExecutionConfig config_target; + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16" }; + config_target.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); + config_target.set_property(ov::intel_gpu::optimize_data(true)); - network network_target(engine, topology, options_target); + network network_target(engine, topology, config_target); network_target.set_input_data("input_origin", input_mem); auto target_out = network_target.execute(); @@ -7818,11 +7818,11 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_different_type) topology.add(reorder_bfyx); // format 8 to 3 -> after fusing, removed // Exec ref network (non-fusing) - build_options options_ref; - options_ref.set_option(build_option::optimize_data(false)); - options_ref.set_option(build_option::allow_static_input_reorder(true)); + ExecutionConfig config_ref; + config_ref.set_property(ov::intel_gpu::optimize_data(false)); + config_ref.set_property(ov::intel_gpu::allow_static_input_reorder(true)); - network network_ref(engine, topology, options_ref); + network network_ref(engine, topology, config_ref); network_ref.set_input_data("input_origin", input_mem); auto ref_out = network_ref.execute(); @@ -7830,12 +7830,12 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_different_type) cldnn::mem_lock ref_out_ptr(ref_out_mem, get_test_stream()); // Exec target network (fusing: conv+reorder) - build_options options_target; - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16" }; - options_target.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); - options_target.set_option(build_option::optimize_data(true)); + ExecutionConfig config_target; + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16" }; + config_target.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); + config_target.set_property(ov::intel_gpu::optimize_data(true)); - network network_target(engine, topology, options_target); + network network_target(engine, topology, config_target); network_target.set_input_data("input_origin", input_mem); auto target_out = network_target.execute(); @@ -7936,11 +7936,11 @@ public: auto topo = build_topology(engine); - auto build_opts = build_options( - build_option::optimize_data(true), - build_option::force_implementations({ { "conv", { input_format(), "" } } }) - ); - auto prog = program::build_program(engine, topo, build_opts); + ExecutionConfig config{ + ov::intel_gpu::optimize_data(true), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", { input_format(), "" } } }) + }; + auto prog = program::build_program(engine, topo, config); cldnn::network net(prog, 0); @@ -8296,11 +8296,11 @@ public: auto topo = this->build_topology(engine); - auto build_opts = build_options( - build_option::optimize_data(true), - build_option::force_implementations({ { "conv", { this->input_format(), "" } } }) - ); - auto prog = program::build_program(engine, topo, build_opts); + ExecutionConfig config{ + ov::intel_gpu::optimize_data(true), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", { this->input_format(), "" } } }) + }; + auto prog = program::build_program(engine, topo, config); cldnn::network net(prog, 0); @@ -8692,14 +8692,14 @@ public: for (cldnn::data_types data_type : data_types) { for (cldnn::format input_format : input_formats) { for (cldnn::format weights_format : weights_formats) { - cldnn::build_options network_build_options; + ExecutionConfig network_build_config; if (input_format == cldnn::format::bfyx) { - network_build_options.set_option(cldnn::build_option::optimize_data(true)); + network_build_config.set_property(ov::intel_gpu::optimize_data(true)); } for (cldnn::tensor input_size : input_tensor_size) { for (cldnn::tensor kernel_size : kernel_sizes) { for (auto output_features : output_features_sizes) { - std::shared_ptr params = std::make_shared(data_type, input_format, input_size.batch[0], input_size.feature[0], tensor(1, 1, input_size.spatial[0], input_size.spatial[1]), network_build_options); + std::shared_ptr params = std::make_shared(data_type, input_format, input_size.batch[0], input_size.feature[0], tensor(1, 1, input_size.spatial[0], input_size.spatial[1]), network_build_config); int input_features = params->input_layouts[0].feature(); params->input_layouts.push_back(cldnn::layout(params->data_type, weights_format, cldnn::tensor(output_features, input_features, kernel_size.spatial[0], kernel_size.spatial[1]))); // weights params->input_layouts.push_back(cldnn::layout(params->data_type, params->fmt, cldnn::tensor(1, 1, output_features, 1))); // biases @@ -8970,7 +8970,7 @@ INSTANTIATE_TEST_SUITE_P(conv_onednn_cases, TEST_P(convolution_gpu_onednn, conv_onednn_cases) { - auto& engine = get_onednn_test_engine(); + auto& engine = get_test_engine(); if (!engine.get_device_info().supports_immad) return; @@ -9071,11 +9071,12 @@ TEST_P(convolution_gpu_onednn, conv_onednn_cases) { conv_fsv.output_paddings = {padding({ 0, 0, 0, 0 }, 0.f)}; topology.add(conv_fsv); } - build_options options; - options.set_option(build_option::optimize_data(true)); - implementation_desc conv_impl = { format::byxf, impl_name, prim_impl_types }; - options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + ov::intel_gpu::ImplementationDesc conv_impl = { format::byxf, impl_name, prim_impl_types }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); + config.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); + network network(engine, topology, config); network.set_input_data("input", input_mem); network.execute(); @@ -9113,7 +9114,7 @@ TEST_P(convolution_gpu_onednn, conv_onednn_cases) { } TEST(convolution_gpu_onednn, padding_for_cldnn_kernel_after_onednn) { - auto& engine = get_onednn_test_engine(); + auto& engine = get_test_engine(); if (!engine.get_device_info().supports_immad) return; @@ -9142,20 +9143,22 @@ TEST(convolution_gpu_onednn, padding_for_cldnn_kernel_after_onednn) { topology topology_test(input, weights, input_reorder, conv1, conv2, output_reorder); topology topology_ref(input, weights, input_reorder, conv1, conv2, output_reorder); - build_options options_test; - implementation_desc conv1_impl_test = { format::byxf, "", impl_types::onednn }; - implementation_desc conv2_impl_test = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16", impl_types::ocl }; - options_test.set_option(build_option::force_implementations({ { "conv1", conv1_impl_test }, { "conv2", conv2_impl_test } })); - options_test.set_option(build_option::optimize_data(true)); + ExecutionConfig config_test; + ov::intel_gpu::ImplementationDesc conv1_impl_test = { format::byxf, "", impl_types::onednn }; + ov::intel_gpu::ImplementationDesc conv2_impl_test = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16", impl_types::ocl }; + config_test.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv1", conv1_impl_test }, { "conv2", conv2_impl_test } })); + config_test.set_property(ov::intel_gpu::optimize_data(true)); + config_test.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); - build_options options_ref; - implementation_desc conv1_impl_ref = { format::bfyx, "", impl_types::ocl }; - implementation_desc conv2_impl_ref = { format::bfyx, "", impl_types::ocl }; - options_ref.set_option(build_option::force_implementations({ { "conv1", conv1_impl_ref }, { "conv2", conv2_impl_ref } })); - options_ref.set_option(build_option::optimize_data(true)); + ExecutionConfig config_ref; + ov::intel_gpu::ImplementationDesc conv1_impl_ref = { format::bfyx, "", impl_types::ocl }; + ov::intel_gpu::ImplementationDesc conv2_impl_ref = { format::bfyx, "", impl_types::ocl }; + config_ref.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv1", conv1_impl_ref }, { "conv2", conv2_impl_ref } })); + config_ref.set_property(ov::intel_gpu::optimize_data(true)); + config_ref.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); - network network_test(engine, topology_test, options_test); - network network_ref(engine, topology_ref, options_ref); + network network_test(engine, topology_test, config_test); + network network_ref(engine, topology_ref, config_ref); network_test.set_input_data("input", input_mem); network_ref.set_input_data("input", input_mem); @@ -9259,16 +9262,16 @@ void test_convolution_f32_gpu_convolution_gpu_bfyx_f16_depthwise_x_bloxk_size_1( topology.add(conv_fsv); - build_options options; - options.set_option(build_option::optimize_data(true)); - implementation_desc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16_depthwise" }; - options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16_depthwise" }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); cldnn::network::ptr network; if (is_caching_test) { membuf mem_buf; { - cldnn::network _network(engine, topology, options); + cldnn::network _network(engine, topology, config); std::ostream out_mem(&mem_buf); BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem); _network.save(ob); @@ -9279,7 +9282,7 @@ void test_convolution_f32_gpu_convolution_gpu_bfyx_f16_depthwise_x_bloxk_size_1( network = std::make_shared(ib, get_test_stream_ptr(), engine); } } else { - network = std::make_shared(engine, topology, options); + network = std::make_shared(engine, topology, config); } network->set_input_data("input", input_mem); diff --git a/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp index 372ff885938..510058d246d 100644 --- a/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp @@ -757,11 +757,11 @@ TEST(crop_gpu, basic_in1x4x1x1_split) { std::vector out1 = { -1.f, 2.f,-3.f }; std::vector out2 = { 4.f, }; set_values(input, input_vec); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - bo.set_option(build_option::outputs(topology.get_primitives_ids())); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids())); - network network(engine, topology, bo); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -805,10 +805,10 @@ TEST(crop_gpu, basic_in1x4x1x1_crop_pad) { std::vector input_vec = { -1.f, 2.f, -3.f, 4.f }; std::vector out1 = { -1.f, 2.f,-3.f }; set_values(input, input_vec); - build_options bo; - bo.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - network network(engine, topology, bo); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -873,11 +873,11 @@ TEST(crop_gpu, basic_i32_in1x4x1x1_split) { std::vector out1 = { -1, 2,-3 }; std::vector out2 = { 4, }; set_values(input, input_vec); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - bo.set_option(build_option::outputs(topology.get_primitives_ids())); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids())); - network network(engine, topology, bo); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -948,11 +948,11 @@ TEST(crop_gpu, basic_i64_in1x4x1x1_split) { std::vector out1 = { -1, 2,-3 }; std::vector out2 = { 4, }; set_values(input, input_vec); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - bo.set_option(build_option::outputs(topology.get_primitives_ids())); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids())); - network network(engine, topology, bo); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -999,8 +999,7 @@ TEST(crop_gpu, basic_in1x4x1x1_split_w_relu) { // Out2: // f0: 4.0 // disable memory pool when we want to check optimized out internal results - engine_configuration cfg{ false, queue_types::out_of_order, std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, false /*mem_pool*/ }; - auto engine = engine::create(engine_types::ocl, runtime_types::ocl, cfg); + auto engine = engine::create(engine_types::ocl, runtime_types::ocl); auto batch_num = 1; auto feature_num = 4; auto x_size = 1; @@ -1026,11 +1025,12 @@ TEST(crop_gpu, basic_in1x4x1x1_split_w_relu) { std::vector out1 = { 0.f, 2.f,0.f }; std::vector out2 = { 4.f, }; set_values(input, input_vec); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - bo.set_option(build_option::debug(true)); //required to have optimized crop despite the fact that it's specified as an output - network network(*engine, topology, bo); + ExecutionConfig cfg{ + ov::intel_gpu::enable_memory_pool(false), + ov::intel_gpu::optimize_data(true) + }; + network network(*engine, topology, cfg); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1038,7 +1038,7 @@ TEST(crop_gpu, basic_in1x4x1x1_split_w_relu) { cldnn::mem_lock output_ptr(output, get_test_stream()); // check if crop has been executed in place - auto in_place = engine->is_the_same_buffer(*outputs.at("crop1").get_memory(), *outputs.at("relu").get_memory()); + auto in_place = engine->is_the_same_buffer(*network.get_output_memory("crop1"), *network.get_output_memory("relu")); ASSERT_TRUE(in_place); for (size_t i = 0; i < out1.size();i++) @@ -1210,15 +1210,15 @@ TEST_P(crop_gpu, pad_test) { res.insert(res.end(), res_data.begin(), res_data.end()); } set_values(input, input_vec); - build_options bo; - bo.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr network; if (is_caching_test) { membuf mem_buf; { - cldnn::network _network(engine, topology, bo); + cldnn::network _network(engine, topology, config); std::ostream out_mem(&mem_buf); BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem); _network.save(ob); @@ -1229,7 +1229,7 @@ TEST_P(crop_gpu, pad_test) { network = std::make_shared(ib, get_test_stream_ptr(), engine); } } else { - network = std::make_shared(engine, topology, bo); + network = std::make_shared(engine, topology, config); } network->set_input_data("input", input); @@ -1304,9 +1304,9 @@ TEST(crop_gpu, dynamic_i32_in2x3x2x2_crop_offsets) { 4, -5, 8, 8, -14, -15, -16, -17 }; set_values(input, input_vec); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); network.set_input_data("input", input); @@ -1363,12 +1363,12 @@ TEST(crop_gpu, dynamic_in1x4x1x1_split) { std::vector out1 = { -1, 2 }; std::vector out2 = { -3, 4 }; set_values(input_mem, input_vec); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); - bo.set_option(build_option::optimize_data(true)); - bo.set_option(build_option::outputs(topology.get_primitives_ids())); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids())); - network network(engine, topology, bo); + network network(engine, topology, config); network.set_input_data("input", input_mem); auto outputs = network.execute(); @@ -1425,12 +1425,12 @@ TEST(crop_gpu, dynamic_in1x4x1x1_varaidic_split) { set_values(axis_mem, {1}); set_values(splits_length_mem, splits_vec); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); - bo.set_option(build_option::optimize_data(true)); - bo.set_option(build_option::outputs(topology.get_primitives_ids())); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids())); - network network(engine, topology, bo); + network network(engine, topology, config); network.set_input_data("input", input_mem); auto outputs = network.execute(); @@ -1471,11 +1471,11 @@ TEST(crop_gpu, static_split_batch) { set_values(input_mem, input_vec); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - bo.set_option(build_option::outputs(topology.get_primitives_ids())); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids())); - network network(engine, topology, bo); + network network(engine, topology, config); network.set_input_data("input", input_mem); auto outputs = network.execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp index 17752b29b09..a346fa1703e 100644 --- a/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp @@ -334,9 +334,9 @@ TEST(cum_sum_gpu_fp32, dynamic) { topology.add(input_layout("input", in_layout)); topology.add(cum_sum("cum_sum", input_info("input"))); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto inst = network.get_primitive("cum_sum"); diff --git a/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp index 2cbc01f8685..15f89d8a9e1 100644 --- a/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp @@ -584,8 +584,8 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1) { // f1: 17 - 13 auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); auto weights = engine.allocate_memory({ data_types::f32, format::yxio, { 2, 1, 2, 2 } }); @@ -604,7 +604,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1) { reorder("plane_output", input_info("deconv"), format::yxfb, cldnn::data_types::f32) ); - network network(engine, topology, options); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -776,8 +776,8 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1_input_padd // f1: 17 - 13 auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); auto weights = engine.allocate_memory({ data_types::f32, format::yxio,{ 2, 1, 2, 2 } }); @@ -795,7 +795,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1_input_padd deconvolution("deconv", input_info("reorder"), { "weights" }, { "biases" }, { 2, 2 }, { 1, 1 }) ); - network network(engine, topology, options); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -907,8 +907,8 @@ TYPED_TEST(deconvolution_basic, basic_f16_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pa auto weights = engine.allocate_memory({ data_types::f32, format::oiyx,{ 1, 1, 2, 2 } }); auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); set_values(input, { FLOAT16(8.f), FLOAT16(0.5f), FLOAT16(6.f), FLOAT16(9.f), @@ -928,7 +928,7 @@ TYPED_TEST(deconvolution_basic, basic_f16_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pa reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f16) ); - network network(engine, topology, options); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1739,9 +1739,9 @@ TYPED_TEST(deconvolution_basic, basic_f16_k9x9_s2x2_pad4x4) { reorder("out", input_info("deconv_act"), format::bfyx, data_types::f16) ); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); - network network_act(engine, topology_act, options); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network_act(engine, topology_act, config); network_act.set_input_data("input_act", input); auto outputs_act = network_act.execute(); @@ -1797,12 +1797,12 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_b_fs_yx_fsv16_stride2_pad reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - cldnn::build_options options; - implementation_desc impl = { format::b_fs_yx_fsv16, "" }; - options.set_option(cldnn::build_option::optimize_data(true)); - options.set_option(build_option::force_implementations({ {"deconv", impl} })); + ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); - network network(engine, topology, options); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1868,12 +1868,12 @@ TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in2x2x1x2_b_fs_yx_fsv16_stride2_pad reorder("out", input_info("deconv"), format::bfyx, data_types::f16) ); - cldnn::build_options options; - implementation_desc impl = { format::b_fs_yx_fsv16, "" }; - options.set_option(cldnn::build_option::optimize_data(true)); - options.set_option(build_option::force_implementations({ {"deconv", impl} })); + ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); - network network(engine, topology, options); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1917,12 +1917,12 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_b_fs_yx_fsv16_stride2_pad reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - cldnn::build_options options; - implementation_desc impl = { format::b_fs_yx_fsv16, "" }; - options.set_option(cldnn::build_option::optimize_data(true)); - options.set_option(build_option::force_implementations({ {"deconv", impl} })); + ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); - network network(engine, topology, options); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1965,12 +1965,12 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_b_fs_yx_fsv16_stride2_pad reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - cldnn::build_options options; - implementation_desc impl = { format::b_fs_yx_fsv16, "" }; - options.set_option(cldnn::build_option::optimize_data(true)); - options.set_option(build_option::force_implementations({ {"deconv", impl} })); + ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); - network network(engine, topology, options); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2011,12 +2011,12 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad_b_fs_yx_fsv16_dw) { reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - cldnn::build_options options; - implementation_desc impl = { format::b_fs_yx_fsv16, "" }; - options.set_option(cldnn::build_option::optimize_data(true)); - options.set_option(build_option::force_implementations({ {"deconv", impl} })); + ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); - network network(engine, topology, options); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2065,12 +2065,12 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_pad1_b_fs_yx_fsv16_dw) { reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - cldnn::build_options options; - implementation_desc impl = { format::b_fs_yx_fsv16, "" }; - options.set_option(cldnn::build_option::optimize_data(true)); - options.set_option(build_option::force_implementations({ {"deconv", impl} })); + ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); - network network(engine, topology, options); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2107,12 +2107,12 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride2_nopad_b_fs_yx_fsv reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - cldnn::build_options options; - implementation_desc impl = { format::b_fs_yx_fsv16, "" }; - options.set_option(cldnn::build_option::optimize_data(true)); - options.set_option(build_option::force_implementations({ {"deconv", impl} })); + ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); - network network(engine, topology, options); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2163,12 +2163,12 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2_b_fs_yx_fsv1 reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - cldnn::build_options options; - implementation_desc impl = { format::b_fs_yx_fsv16, "" }; - options.set_option(cldnn::build_option::optimize_data(true)); - options.set_option(build_option::force_implementations({ {"deconv", impl} })); + ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); - network network(engine, topology, options); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2219,12 +2219,12 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2_b_fs_yx_fsv1 reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - cldnn::build_options options; - implementation_desc impl = { format::b_fs_yx_fsv16, "" }; - options.set_option(cldnn::build_option::optimize_data(true)); - options.set_option(build_option::force_implementations({ {"deconv", impl} })); + ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); - network network(engine, topology, options); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2304,12 +2304,12 @@ TEST(deconvolution_f32_fw_gpu, bs_fs_zyx_bsv16_fsv16_wsiz2x2x2_in1x1x2x2x2_strid reorder("out", input_info("deconv"), format::bfzyx, data_types::f32) ); - cldnn::build_options options; - implementation_desc impl = { format::bs_fs_zyx_bsv16_fsv16, "" }; - options.set_option(cldnn::build_option::optimize_data(true)); - options.set_option(build_option::force_implementations({ {"deconv", impl} })); + ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ImplementationDesc impl = { format::bs_fs_zyx_bsv16_fsv16, "" }; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); - network network(engine, topology, options); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2357,15 +2357,15 @@ void test_deconvolution_f16_fw_gpu_basic_wsiz2x2_in1x2x2x2_fs_b_yx_fsv32_stride1 reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr network; if (is_caching_test) { membuf mem_buf; { - cldnn::network _network(engine, topology, options); + cldnn::network _network(engine, topology, config); std::ostream out_mem(&mem_buf); BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem); _network.save(ob); @@ -2376,7 +2376,7 @@ void test_deconvolution_f16_fw_gpu_basic_wsiz2x2_in1x2x2x2_fs_b_yx_fsv32_stride1 network = std::make_shared(ib, get_test_stream_ptr(), engine); } } else { - network = std::make_shared(engine, topology, options); + network = std::make_shared(engine, topology, config); } network->set_input_data("input", input); @@ -2421,7 +2421,7 @@ struct deconvolution_random_test_params { ov::CoordinateDiff pad; bool with_bias; data_types output_type; - cldnn::implementation_desc deconv_desc; + ov::intel_gpu::ImplementationDesc deconv_desc; static std::string print_params(const testing::TestParamInfo& param_info) { auto& param = param_info.param; @@ -2600,7 +2600,7 @@ public: type_test_ranges::max); } - void run(cldnn::engine& eng, const deconvolution_random_test_params& params, cldnn::build_options build_opts) { + void run(cldnn::engine& eng, const deconvolution_random_test_params& params, ExecutionConfig config) { uint32_t groups = params.weights_size.group[0]; size_t ifm = params.weights_size.feature[0]; size_t ofm = params.weights_size.batch[0]; @@ -2638,14 +2638,14 @@ public: // turn off optimizer to check blocked format without reordering to plane format if (params.deconv_desc.output_format == cldnn::format::any && !format::is_simple_data_format(in_layout.format)) { - build_opts.set_option(build_option::optimize_data(false)); + config.set_property(ov::intel_gpu::optimize_data(false)); } if (!params.deconv_desc.kernel_name.empty() || params.deconv_desc.output_format != cldnn::format::any) { - build_opts.set_option(cldnn::build_option::force_implementations({ { "deconv", params.deconv_desc } })); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "deconv", params.deconv_desc } })); } - cldnn::network net(eng, topo, build_opts); + cldnn::network net(eng, topo, config); net.set_input_data("input", in_mem); auto result = net.execute(); @@ -2708,7 +2708,7 @@ public: class deconvolution_random_test : public testing::TestWithParam { protected: void SetUp() override { - build_opts.set_option(cldnn::build_option::optimize_data(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); } void run() { @@ -2731,14 +2731,14 @@ protected: } } - cldnn::build_options build_opts; + ov::intel_gpu::ExecutionConfig config; private: template void run_typed() { auto& params = GetParam(); deconvolution_random_test_base test; - test.run(get_test_engine(), params, build_opts); + test.run(get_test_engine(), params, config); } template @@ -2790,21 +2790,21 @@ public: std::vector batches = { 1, 2 }; for (auto b : batches) { // 1x1 - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 15, 7, 7}, wei_dt, format::oiyx, {15, 15, 1, 1}, {1, 1}, {0, 0}, true, out_dt, implementation_desc{out_fmt, ""} }); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 15, 7, 7}, wei_dt, format::oiyx, {15, 15, 1, 1}, {2, 2}, {0, 0}, true, out_dt, implementation_desc{out_fmt, ""} }); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 15, 7, 7}, wei_dt, format::oiyx, {15, 15, 1, 1}, {1, 1}, {0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""} }); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 15, 7, 7}, wei_dt, format::oiyx, {15, 15, 1, 1}, {2, 2}, {0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""} }); // 3x3 - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 15, 7, 7}, wei_dt, format::oiyx, {15, 15, 3, 3}, {1, 1}, {1, 1}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 15, 7, 7}, wei_dt, format::oiyx, {15, 15, 3, 3}, {2, 2}, {1, 1}, true, out_dt, implementation_desc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 15, 7, 7}, wei_dt, format::oiyx, {15, 15, 3, 3}, {1, 1}, {1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 15, 7, 7}, wei_dt, format::oiyx, {15, 15, 3, 3}, {2, 2}, {1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); // Grouped - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 8, 7, 7}, wei_dt, format::goiyx, tensor(group(2), batch(16), feature(4), spatial(1, 1)), {1, 1}, {0, 0}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 8, 7, 7}, wei_dt, format::goiyx, tensor(group(2), batch(16), feature(4), spatial(1, 1)), {2, 2}, {0, 0}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 8, 7, 7}, wei_dt, format::goiyx, tensor(group(2), batch(16), feature(4), spatial(3, 3)), {1, 1}, {1, 1}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 8, 7, 7}, wei_dt, format::goiyx, tensor(group(2), batch(16), feature(4), spatial(3, 3)), {2, 2}, {1, 1}, true, out_dt, implementation_desc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 8, 7, 7}, wei_dt, format::goiyx, tensor(group(2), batch(16), feature(4), spatial(1, 1)), {1, 1}, {0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 8, 7, 7}, wei_dt, format::goiyx, tensor(group(2), batch(16), feature(4), spatial(1, 1)), {2, 2}, {0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 8, 7, 7}, wei_dt, format::goiyx, tensor(group(2), batch(16), feature(4), spatial(3, 3)), {1, 1}, {1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 8, 7, 7}, wei_dt, format::goiyx, tensor(group(2), batch(16), feature(4), spatial(3, 3)), {2, 2}, {1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); // Depthwise - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 16, 7, 7}, wei_dt, format::goiyx, tensor(group(16), spatial(1, 1)), {1, 1}, {0, 0}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 16, 7, 7}, wei_dt, format::goiyx, tensor(group(16), spatial(1, 1)), {2, 2}, {0, 0}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 16, 7, 7}, wei_dt, format::goiyx, tensor(group(16), spatial(3, 3)), {1, 1}, {1, 1}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 16, 7, 7}, wei_dt, format::goiyx, tensor(group(16), spatial(3, 3)), {2, 2}, {1, 1}, true, out_dt, implementation_desc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 16, 7, 7}, wei_dt, format::goiyx, tensor(group(16), spatial(1, 1)), {1, 1}, {0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 16, 7, 7}, wei_dt, format::goiyx, tensor(group(16), spatial(1, 1)), {2, 2}, {0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 16, 7, 7}, wei_dt, format::goiyx, tensor(group(16), spatial(3, 3)), {1, 1}, {1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 16, 7, 7}, wei_dt, format::goiyx, tensor(group(16), spatial(3, 3)), {2, 2}, {1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); } return *this; } @@ -2813,21 +2813,21 @@ public: std::vector batches = { 1, 2, 16, 32 }; for (auto b : batches) { // 1x1 - push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7, 7}, wei_dt, format::oizyx, {15, 15, 1, 1, 1}, {1, 1, 1}, {0, 0, 0}, true, out_dt, implementation_desc{out_fmt, ""} }); - push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7, 7}, wei_dt, format::oizyx, {15, 15, 1, 1, 1}, {2, 2, 2}, {0, 0, 0}, true, out_dt, implementation_desc{out_fmt, ""} }); + push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7, 7}, wei_dt, format::oizyx, {15, 15, 1, 1, 1}, {1, 1, 1}, {0, 0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""} }); + push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7, 7}, wei_dt, format::oizyx, {15, 15, 1, 1, 1}, {2, 2, 2}, {0, 0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""} }); // 3x3 - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 15, 7, 7, 7}, wei_dt, format::oizyx, {15, 15, 3, 3, 3}, {1, 1, 1}, {1, 1, 1}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 15, 7, 7, 7}, wei_dt, format::oizyx, {15, 15, 3, 3, 3}, {2, 2, 2}, {1, 1, 1}, true, out_dt, implementation_desc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 15, 7, 7, 7}, wei_dt, format::oizyx, {15, 15, 3, 3, 3}, {1, 1, 1}, {1, 1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 15, 7, 7, 7}, wei_dt, format::oizyx, {15, 15, 3, 3, 3}, {2, 2, 2}, {1, 1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); // Grouped - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 8, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(2), batch(16), feature(4), spatial(1, 1, 1)), {1, 1, 1}, {0, 0, 0}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 8, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(2), batch(16), feature(4), spatial(1, 1, 1)), {2, 2, 2}, {0, 0, 0}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 8, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(2), batch(16), feature(4), spatial(3, 3, 3)), {1, 1, 1}, {1, 1, 1}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 8, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(2), batch(16), feature(4), spatial(3, 3, 3)), {2, 2, 2}, {1, 1, 1}, true, out_dt, implementation_desc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 8, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(2), batch(16), feature(4), spatial(1, 1, 1)), {1, 1, 1}, {0, 0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 8, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(2), batch(16), feature(4), spatial(1, 1, 1)), {2, 2, 2}, {0, 0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 8, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(2), batch(16), feature(4), spatial(3, 3, 3)), {1, 1, 1}, {1, 1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 8, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(2), batch(16), feature(4), spatial(3, 3, 3)), {2, 2, 2}, {1, 1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); // Depthwise - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 16, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(16), spatial(1, 1, 1)), {1, 1, 1}, {0, 0, 0}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 16, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(16), spatial(1, 1, 1)), {2, 2, 2}, {0, 0, 0}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 16, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(16), spatial(3, 3, 3)), {1, 1, 1}, {1, 1, 1}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 16, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(16), spatial(3, 3, 3)), {2, 2, 2}, {1, 1, 1}, true, out_dt, implementation_desc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 16, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(16), spatial(1, 1, 1)), {1, 1, 1}, {0, 0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 16, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(16), spatial(1, 1, 1)), {2, 2, 2}, {0, 0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 16, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(16), spatial(3, 3, 3)), {1, 1, 1}, {1, 1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 16, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(16), spatial(3, 3, 3)), {2, 2, 2}, {1, 1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); } return *this; } @@ -2836,19 +2836,19 @@ public: std::vector batches = { 1, 2, 16 }; for (auto b : batches) { // 1x1 - push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 1, 1}, {1, 1}, {0, 0}, true, out_dt, implementation_desc{out_fmt, ""} }); - push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 1, 1}, {2, 2}, {0, 0}, true, out_dt, implementation_desc{out_fmt, ""} }); + push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 1, 1}, {1, 1}, {0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""} }); + push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 1, 1}, {2, 2}, {0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""} }); // 3x3 - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 3, 3}, {1, 1}, {1, 1}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 3, 3}, {2, 2}, {1, 1}, true, out_dt, implementation_desc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 3, 3}, {1, 1}, {1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 3, 3}, {2, 2}, {1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); // Asymmetric weights - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 3, 2}, {1, 1}, {1, 0}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 3, 2}, {2, 2}, {1, 0}, true, out_dt, implementation_desc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 3, 2}, {1, 1}, {1, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 3, 2}, {2, 2}, {1, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); // Uneven groups - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 27, 19, 17}, wei_dt, format::goiyx, tensor(group(3), batch(7), feature(9), spatial(1, 1)), {1, 1}, {0, 0}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 27, 19, 17}, wei_dt, format::goiyx, tensor(group(3), batch(7), feature(9), spatial(1, 1)), {2, 2}, {0, 0}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 27, 19, 17}, wei_dt, format::goiyx, tensor(group(3), batch(7), feature(9), spatial(3, 3)), {1, 1}, {1, 1}, true, out_dt, implementation_desc{out_fmt, ""}}); - push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 27, 19, 17}, wei_dt, format::goiyx, tensor(group(3), batch(7), feature(9), spatial(3, 3)), {2, 2}, {1, 1}, true, out_dt, implementation_desc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 27, 19, 17}, wei_dt, format::goiyx, tensor(group(3), batch(7), feature(9), spatial(1, 1)), {1, 1}, {0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 27, 19, 17}, wei_dt, format::goiyx, tensor(group(3), batch(7), feature(9), spatial(1, 1)), {2, 2}, {0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 27, 19, 17}, wei_dt, format::goiyx, tensor(group(3), batch(7), feature(9), spatial(3, 3)), {1, 1}, {1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); + push_back(deconvolution_random_test_params{in_dt, in_fmt, {b, 27, 19, 17}, wei_dt, format::goiyx, tensor(group(3), batch(7), feature(9), spatial(3, 3)), {2, 2}, {1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""}}); } return *this; } @@ -2857,19 +2857,19 @@ public: std::vector batches = { 1, 2, 16 }; for (auto b : batches) { // 1x1 - push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 1, 1, 1}, {1, 1, 1}, {0, 0, 0}, true, out_dt, implementation_desc{out_fmt, ""} }); - push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 1, 1, 1}, {2, 2, 2}, {0, 0, 0}, true, out_dt, implementation_desc{out_fmt, ""} }); + push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 1, 1, 1}, {1, 1, 1}, {0, 0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""} }); + push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 1, 1, 1}, {2, 2, 2}, {0, 0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""} }); // 3x3 - push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 3, 3, 3}, {1, 1, 1}, {1, 1, 1}, true, out_dt, implementation_desc{out_fmt, ""} }); - push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 3, 3, 3}, {2, 2, 2}, {1, 1, 1}, true, out_dt, implementation_desc{out_fmt, ""} }); + push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 3, 3, 3}, {1, 1, 1}, {1, 1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""} }); + push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 3, 3, 3}, {2, 2, 2}, {1, 1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""} }); // Asymmetric weights - push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 3, 2, 4}, {1, 1, 1}, {2, 1, 0}, true, out_dt, implementation_desc{out_fmt, ""} }); - push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 3, 2, 4}, {2, 2, 2}, {2, 1, 0}, true, out_dt, implementation_desc{out_fmt, ""} }); + push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 3, 2, 4}, {1, 1, 1}, {2, 1, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""} }); + push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 3, 2, 4}, {2, 2, 2}, {2, 1, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""} }); // Uneven groups - push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17, 11}, wei_dt, format::goizyx, tensor(group(3), batch(7), feature(9), spatial(1, 1, 1)), {1, 1, 1}, {0, 0, 0}, true, out_dt, implementation_desc{out_fmt, ""} }); - push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17, 11}, wei_dt, format::goizyx, tensor(group(3), batch(7), feature(9), spatial(1, 1, 1)), {2, 2, 2}, {0, 0, 0}, true, out_dt, implementation_desc{out_fmt, ""} }); - push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17, 11}, wei_dt, format::goizyx, tensor(group(3), batch(7), feature(9), spatial(3, 3, 3)), {1, 1, 1}, {1, 1, 1}, true, out_dt, implementation_desc{out_fmt, ""} }); - push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17, 11}, wei_dt, format::goizyx, tensor(group(3), batch(7), feature(9), spatial(3, 3, 3)), {2, 2, 2}, {1, 1, 1}, true, out_dt, implementation_desc{out_fmt, ""} }); + push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17, 11}, wei_dt, format::goizyx, tensor(group(3), batch(7), feature(9), spatial(1, 1, 1)), {1, 1, 1}, {0, 0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""} }); + push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17, 11}, wei_dt, format::goizyx, tensor(group(3), batch(7), feature(9), spatial(1, 1, 1)), {2, 2, 2}, {0, 0, 0}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""} }); + push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17, 11}, wei_dt, format::goizyx, tensor(group(3), batch(7), feature(9), spatial(3, 3, 3)), {1, 1, 1}, {1, 1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""} }); + push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17, 11}, wei_dt, format::goizyx, tensor(group(3), batch(7), feature(9), spatial(3, 3, 3)), {2, 2, 2}, {1, 1, 1}, true, out_dt, ov::intel_gpu::ImplementationDesc{out_fmt, ""} }); } return *this; } @@ -2967,7 +2967,7 @@ TEST(deconvolution_f32_fw_gpu_onednn, basic_wsiz2x2_in2x2x1x1_stride2_nopad) { // Output : 4x4 // Stride : 2x2 - auto& engine = get_onednn_test_engine(); + auto& engine = get_test_engine(); if (!engine.get_device_info().supports_immad) return; @@ -2986,11 +2986,11 @@ TEST(deconvolution_f32_fw_gpu_onednn, basic_wsiz2x2_in2x2x1x1_stride2_nopad) { deconvolution("deconv", input_info("input"), { "weights" }, { "biases" }, { 2,2 }) ); - build_options bo; - implementation_desc conv_impl = { format::yxfb, "", impl_types::onednn }; - bo.set_option(build_option::force_implementations({ {"deconv", conv_impl} })); + ov::intel_gpu::ImplementationDesc conv_impl = { format::yxfb, "", impl_types::onednn }; - network network(engine, topology, bo); + ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", conv_impl} })}; + network network(engine, topology, cfg); network.set_input_data("input", input); auto outputs = network.execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/depth_concatenate_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/depth_concatenate_gpu_test.cpp index 5408af4c468..8edcf50bf91 100644 --- a/src/plugins/intel_gpu/tests/test_cases/depth_concatenate_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/depth_concatenate_gpu_test.cpp @@ -253,9 +253,9 @@ TEST(concatenate_f32_gpu, test_concatenation_of_pool_and_unpool) { topology.add(data("weights", weights)); topology.add(convolution("conv", input_info("concat1"), {"weights"})); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); - network network(engine, topology, options); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); auto outputs = network.execute({}); @@ -288,9 +288,9 @@ TEST(depth_concatenate_f32_gpu, test03_cascade_concat_opt) { topology.add(concatenation("depth3", { input_info("relu4"), input_info("depth2") }, 1)); topology.add(activation("relu5", input_info("depth3"), activation_func::relu)); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); - network network(engine, topology, options); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); @@ -340,9 +340,9 @@ TEST(depth_concatenate_f32_gpu, test04_fused_relu) { topology.add(concatenation("depth1", { input_info("input1"), input_info("input2") }, 1)); topology.add(activation("relu1", input_info("depth1"), activation_func::relu)); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); - network network(engine, topology, options); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -394,9 +394,9 @@ TEST(depth_concatenate_f32_gpu, test05_different_formats) { topology.add(concatenation("depth1", { input_info("reshape1"), input_info("reshape2") }, 1)); topology.add(reorder("output", input_info("depth1"), format::bfyx, data_types::f32)); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); - network network(engine, topology, options); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -453,10 +453,10 @@ TEST(depth_concatenate_f32_gpu, test06_padded_input) { topology.add(concatenation("depth2", { input_info("depth1"), input_info("conv") }, 1)); topology.add(reorder("output", input_info("depth2"), format::bfyx, data_types::f32)); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); - options.set_option(cldnn::build_option::force_implementations({ {"conv", implementation_desc{format::fs_b_yx_fsv32, ""} } })); - network network(engine, topology, options); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv", ov::intel_gpu::ImplementationDesc{format::fs_b_yx_fsv32, ""} } })); + network network(engine, topology, config); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -529,10 +529,10 @@ TEST(depth_concatenate_f32_gpu, test07_padded_output) { topology.add(convolution("conv", input_info("depth1"), { "weights" }, {1, 1}, {1, 1})); topology.add(reorder("output", input_info("conv"), format::bfyx, data_types::f32)); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); - options.set_option(cldnn::build_option::force_implementations({ {"conv", implementation_desc{format::fs_b_yx_fsv32, ""} } })); - network network(engine, topology, options); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv", ov::intel_gpu::ImplementationDesc{format::fs_b_yx_fsv32, ""} } })); + network network(engine, topology, config); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -589,9 +589,9 @@ TEST(depth_concatenate_f32_gpu, test07_concat_is_output) { topology.add(activation("actv2", input_info("input2"), activation_func::linear, { 0.5f, 0.0f })); topology.add(concatenation("depth1", { input_info("actv1"), input_info("actv2") }, 1)); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); - network network(engine, topology, options); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -620,7 +620,7 @@ TEST(depth_concatenate_f32_gpu, test07_concat_is_output) { TEST(depth_concatenate_f32_gpu, concat_with_different_format_inputs) { auto& engine = get_test_engine(); - build_options build_opt; + ExecutionConfig config; const int in1_f = 2, in2_f = 1; const int b = 2, x = 2, y = 4; auto input1 = engine.allocate_memory({ data_types::f32, format::yxfb,{ b, in1_f, y, x } }); @@ -655,8 +655,8 @@ TEST(depth_concatenate_f32_gpu, concat_with_different_format_inputs) { topology.add(concatenation("depth3", { input_info("depth1"), input_info("depth2") }, 1)); topology.add(concatenation("depth4", { input_info("depth3") }, 1)); - build_opt.set_option(build_option::optimize_data(true)); - network network(engine, topology, build_opt); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -704,7 +704,7 @@ TEST(depth_concatenate_f32_gpu, concat_with_different_format_inputs) { TEST(depth_concatenate_f32_gpu, concat_with_reshape_input) { auto& engine = get_test_engine(); - build_options build_opt; + ExecutionConfig config; auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2,4,1,2 } }); std::vector values = { @@ -721,8 +721,8 @@ TEST(depth_concatenate_f32_gpu, concat_with_reshape_input) { topology.add(concatenation("depth1", { input_info("reshape") }, 1)); topology.add(concatenation("depth2", { input_info("depth1") }, 1)); - build_opt.set_option(build_option::optimize_data(true)); - network network(engine, topology, build_opt); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); @@ -742,7 +742,7 @@ TEST(depth_concatenate_f32_gpu, concat_with_reshape_input) { TEST(depth_concatenate_i32_gpu, optimize_data01) { auto& engine = get_test_engine(); - build_options build_opt; + ExecutionConfig config; auto input = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 1, 1}}); topology topology; @@ -756,8 +756,8 @@ TEST(depth_concatenate_i32_gpu, optimize_data01) { std::vector out_data = {4}; set_values(input, input_data); - build_opt.set_option(build_option::optimize_data(true)); - network network(engine, topology, build_opt); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -769,7 +769,7 @@ TEST(depth_concatenate_i32_gpu, optimize_data01) { TEST(depth_concatenate_i32_gpu, optimize_data02) { auto& engine = get_test_engine(); - build_options build_opt; + ExecutionConfig config; auto input1 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); auto input2 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); auto input3 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); @@ -819,8 +819,8 @@ TEST(depth_concatenate_i32_gpu, optimize_data02) { set_values(input3, input_data3); set_values(input4, input_data4); - build_opt.set_option(build_option::optimize_data(true)); - network network(engine, topology, build_opt); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); network.set_input_data("input2", input2); network.set_input_data("input3", input3); @@ -836,7 +836,7 @@ TEST(depth_concatenate_i32_gpu, optimize_data02) { TEST(depth_concatenate_i32_gpu, optimize_data03) { auto& engine = get_test_engine(); - build_options build_opt; + ExecutionConfig config; auto input1 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); topology topology; @@ -860,8 +860,8 @@ TEST(depth_concatenate_i32_gpu, optimize_data03) { set_values(input1, input_data1); - build_opt.set_option(build_option::optimize_data(true)); - network network(engine, topology, build_opt); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); auto outputs = network.execute(); @@ -876,7 +876,7 @@ TEST(depth_concatenate_i32_gpu, optimize_data03) { TEST(depth_concatenate_i32_gpu, optimize_data04) { auto& engine = get_test_engine(); - build_options build_opt; + ExecutionConfig config; auto input1 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); topology topology; @@ -900,8 +900,8 @@ TEST(depth_concatenate_i32_gpu, optimize_data04) { set_values(input1, input_data1); - build_opt.set_option(build_option::optimize_data(true)); - network network(engine, topology, build_opt); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); auto outputs = network.execute(); @@ -916,7 +916,7 @@ TEST(depth_concatenate_i32_gpu, optimize_data04) { TEST(depth_concatenate_i32_gpu, optimize_data05) { auto& engine = get_test_engine(); - build_options build_opt; + ExecutionConfig config; auto input1 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); topology topology; @@ -941,8 +941,8 @@ TEST(depth_concatenate_i32_gpu, optimize_data05) { set_values(input1, input_data1); - build_opt.set_option(build_option::optimize_data(true)); - network network(engine, topology, build_opt); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); auto outputs = network.execute(); @@ -990,14 +990,14 @@ void test_depth_concatenate_f32_gpu_basic_bfwzyx_along_w(bool is_caching_test) { set_values(input1, input_data); - build_options build_opt; - build_opt.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr network; if (is_caching_test) { membuf mem_buf; { - cldnn::network _network(engine, topology, build_opt); + cldnn::network _network(engine, topology, config); std::ostream out_mem(&mem_buf); BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem); _network.save(ob); @@ -1008,7 +1008,7 @@ void test_depth_concatenate_f32_gpu_basic_bfwzyx_along_w(bool is_caching_test) { network = std::make_shared(ib, get_test_stream_ptr(), engine); } } else { - network = std::make_shared(engine, topology, build_opt); + network = std::make_shared(engine, topology, config); } network->set_input_data("input1", input1); diff --git a/src/plugins/intel_gpu/tests/test_cases/depth_to_space_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/depth_to_space_gpu_test.cpp index e898ca381ad..7ae466fe4c1 100644 --- a/src/plugins/intel_gpu/tests/test_cases/depth_to_space_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/depth_to_space_gpu_test.cpp @@ -245,10 +245,8 @@ TEST(depth_to_space_fp32_gpu, d112960540_bs2) { reshape("reshape2", input_info("perm"), tensor(1, 3, 2 * 960, 2 * 540)) ); - build_options build_opt; - - build_opt.set_option(build_option::optimize_data(true)); - network network_ref(engine, topology_ref, build_opt); + ExecutionConfig config(ov::intel_gpu::optimize_data(true)); + network network_ref(engine, topology_ref, config); network_ref.set_input_data("Input0", input1); auto outputs_ref = network_ref.execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp b/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp index cfba1e05807..71e4f3e85b8 100644 --- a/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp @@ -147,8 +147,8 @@ public: topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k)); - build_options opts; - network network(engine, topology, opts); + ExecutionConfig config; + network network(engine, topology, config); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); network.set_input_data("input_prior_box", input_prior_box); @@ -182,8 +182,8 @@ public: topology.add(detection_output("detection_output_1", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k)); topology.add(detection_output("detection_output_2", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k)); - build_options opts; - network network(engine, topology, opts); + ExecutionConfig config; + network network(engine, topology, config); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); network.set_input_data("input_prior_box", input_prior_box); @@ -224,8 +224,8 @@ public: topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); - build_options opts; - network network(engine, topology, opts); + ExecutionConfig config; + network network(engine, topology, config); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); network.set_input_data("input_prior_box", input_prior_box); @@ -272,8 +272,8 @@ public: topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); - build_options opts; - network network(engine, topology, opts); + ExecutionConfig config; + network network(engine, topology, config); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); network.set_input_data("input_prior_box", input_prior_box); @@ -314,8 +314,8 @@ public: topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); - build_options opts; - network network(engine, topology, opts); + ExecutionConfig config; + network network(engine, topology, config); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); network.set_input_data("input_prior_box", input_prior_box); @@ -367,8 +367,8 @@ public: topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k)); - build_options opts; - network network(engine, topology, opts); + ExecutionConfig config; + network network(engine, topology, config); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); network.set_input_data("input_prior_box", input_prior_box); @@ -430,8 +430,8 @@ public: prior_coordinates_offset, prior_is_normalized, input_width, input_height, decrease_label_id )); - build_options opts; - network network(engine, topology, opts); + ExecutionConfig config; + network network(engine, topology, config); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); network.set_input_data("input_prior_box", input_prior_box); @@ -480,8 +480,8 @@ public: topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); - build_options opts; - network network(engine, topology, opts); + ExecutionConfig config; + network network(engine, topology, config); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); network.set_input_data("input_prior_box", input_prior_box); @@ -541,8 +541,8 @@ public: topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k)); - build_options opts; - network network(engine, topology, opts); + ExecutionConfig config; + network network(engine, topology, config); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); network.set_input_data("input_prior_box", input_prior_box); @@ -589,8 +589,8 @@ public: topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); - build_options opts; - network network(engine, topology, opts); + ExecutionConfig config; + network network(engine, topology, config); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); network.set_input_data("input_prior_box", input_prior_box); @@ -640,8 +640,8 @@ public: topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k)); - build_options opts; - network network(engine, topology, opts); + ExecutionConfig config; + network network(engine, topology, config); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); network.set_input_data("input_prior_box", input_prior_box); @@ -686,8 +686,8 @@ public: topology.add(detection_output("detection_output", input_info("input_location_padded"), input_info("input_confidence_padded"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k)); - build_options opts; - network network(engine, topology, opts); + ExecutionConfig config; + network network(engine, topology, config); network.set_input_data("input_location", input_location); network.set_input_data("input_confidence", input_confidence); network.set_input_data("input_prior_box", input_prior_box); @@ -749,13 +749,13 @@ public: prior_is_normalized, this->img_size, this->img_size )); - build_options opts; + ExecutionConfig config; cldnn::network::ptr network; if (is_caching_test) { membuf mem_buf; { - cldnn::network _network(engine, topology, opts); + cldnn::network _network(engine, topology, config); std::ostream out_mem(&mem_buf); BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem); _network.save(ob); @@ -766,7 +766,7 @@ public: network = std::make_shared(ib, get_test_stream_ptr(), engine); } } else { - network = std::make_shared(engine, topology, opts); + network = std::make_shared(engine, topology, config); } network->set_input_data("input_location", input_location); diff --git a/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp index 3a0703393ca..5ed1bc287c4 100644 --- a/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp @@ -1126,9 +1126,9 @@ TEST(eltwise_gpu_f32, dynamic_kernel_no_broadcast) { 15.f, 17.f, 8.f, 10.f, -2.f, 6.5f, -0.5f, -2.5f }); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -1182,9 +1182,9 @@ TEST(eltwise_gpu_f32, dynamic_kernel_broadcast) { set_values(input2, { 0.5f, -0.5f }); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -3527,10 +3527,10 @@ struct eltwise_same_input_test : testing::TestWithParam{"eltwise"})); - cldnn::network net(engine, topo, build_ops); + cldnn::network net(engine, topo, config); net.set_input_data("input1", input); net.set_input_data("input2", input); @@ -3691,9 +3691,9 @@ TEST_P(eltwise_test, fsv16) { topology.add(reorder("out", input_info("eltwise"), fmt_pln, data_types::f32)); primitive_id out_id = "out"; - build_options bo; - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -3797,9 +3797,9 @@ TEST_P(eltwise_test_6d, bfwzyx) { topology.add(reorder("out", input_info("eltwise"), format::bfwzyx, data_types::f32)); primitive_id out_id = "out"; - build_options bo; - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -3882,9 +3882,9 @@ TEST_P(eltwise_test_mixed_precision, fsv16) { topology.add(reorder("out", input_info("eltwise"), fmt_pln, data_types::f32)); primitive_id out_id = "out"; - build_options bo; - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -4117,11 +4117,11 @@ struct eltwise_random_test : testing::TestWithParam auto prim = eltwise("eltwise", { input_info("input1"), input_info("input2") }, params.mode); topo.add(prim); - auto build_ops = build_options(); - build_ops.set_option(build_option::outputs({"eltwise"})); - build_ops.set_option(build_option::force_implementations({ {"eltwise", {params.in_format, "generic_eltwise_ref"}} })); + ExecutionConfig config; + config.set_property(ov::intel_gpu::custom_outputs(std::vector{"eltwise"})); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"eltwise", {params.in_format, "generic_eltwise_ref"}} })); - cldnn::network net(engine, topo, build_ops); + cldnn::network net(engine, topo, config); net.set_input_data("input1", input1); net.set_input_data("input2", input2); @@ -4134,15 +4134,15 @@ struct eltwise_random_test : testing::TestWithParam auto prim_opt = eltwise("eltwise_opt", { input_info("input1"), input_info("input2") }, params.mode); topo_opt.add(prim_opt); - auto buildops_opt = build_options(); - buildops_opt.set_option(build_option::outputs({"eltwise_opt"})); + ExecutionConfig config_opt; + config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector{"eltwise_opt"})); std::shared_ptr net_opt; if (is_caching_test) { membuf mem_buf; { - cldnn::network _network(engine, topo_opt, buildops_opt); + cldnn::network _network(engine, topo_opt, config_opt); std::ostream out_mem(&mem_buf); BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem); _network.save(ob); @@ -4150,10 +4150,10 @@ struct eltwise_random_test : testing::TestWithParam { std::istream in_mem(&mem_buf); BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine); - net_opt = std::make_shared(ib, get_test_stream_ptr(), engine); + net_opt = std::make_shared(ib, config_opt, get_test_stream_ptr(), engine); } } else { - net_opt = std::make_shared(engine, topo_opt, buildops_opt); + net_opt = std::make_shared(engine, topo_opt, config_opt); } net_opt->set_input_data("input1", input1); diff --git a/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp index 429dae7b690..492aefb2d50 100644 --- a/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp @@ -799,9 +799,9 @@ TEST(fully_connected_gpu, b_fs_yx_fsv4) topology.add(reorder_gold, reorder_imad); // Network build - build_options build_opt; - build_opt.set_option(build_option::optimize_data(true)); - network network(engine, topology, build_opt); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); // Network execuiton network.set_input_data("input", input); @@ -868,10 +868,10 @@ TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b12) { ); // Set data optimization to allow weights reordering to optimal format - build_options opts; - opts.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - network network(engine, topology, opts); + network network(engine, topology, config); network.set_input_data("input", input_prim); auto outputs = network.execute(); @@ -944,10 +944,10 @@ TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b34) ); // Set data optimization to allow weights reordering to optimal format - build_options opts; - opts.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - network network(engine, topology, opts); + network network(engine, topology, config); network.set_input_data("input", input_prim); auto outputs = network.execute(); @@ -1004,9 +1004,9 @@ struct fully_connected_random_test : ::testing::TestWithParam("input", input_format, std::move(input_data)); auto weights = net.add_data("weights", format::oiyx, std::move(weights_data)); auto bias = net.add_data("bias", format::bfyx, std::move(bias_data)); - auto fc = net.add_fully_connected("fc_prim", input, weights, bias, implementation_desc{ output_format, kernel }); + auto fc = net.add_fully_connected("fc_prim", input, weights, bias, ov::intel_gpu::ImplementationDesc{ output_format, kernel }); - net.run(build_options(build_option::optimize_data(true)), is_caching_test); + net.run(ExecutionConfig(ov::intel_gpu::optimize_data(true)), is_caching_test); } }; @@ -1127,9 +1127,9 @@ struct fully_connected_random_test_3d : ::testing::TestWithParam("input", input_format, std::move(input_data)); auto weights = net.add_data("weights", format::oiyx, std::move(weights_data)); auto bias = net.add_data("bias", format::bfyx, std::move(bias_data)); - auto fc = net.add_fully_connected_3d("fc_prim", input, weights, bias, implementation_desc{ output_format, kernel }, 3); + auto fc = net.add_fully_connected_3d("fc_prim", input, weights, bias, ov::intel_gpu::ImplementationDesc{ output_format, kernel }, 3); - net.run(build_options(build_option::optimize_data(true)), is_caching_test); + net.run(ExecutionConfig(ov::intel_gpu::optimize_data(true)), is_caching_test); } }; @@ -1393,10 +1393,10 @@ public: topo.add(reorder("output", input_info("quantization_prim"), format::bfyx, output_data_type())); - build_options build_opts; - build_opts.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - network net(engine, topo, build_opts); + network net(engine, topo, config); net.set_input_data("input", input_prim); auto output = net.execute(); @@ -1661,7 +1661,7 @@ TEST(fully_connected_onednn_gpu, no_biases_int8) { const int32_t input_f = 3, input_b = 1, // size of the whole input buffer weight_b = 4, weight_f = 3; // size of the whole weights buffer - auto& engine = get_onednn_test_engine(); + auto& engine = get_test_engine(); if (!engine.get_device_info().supports_immad) return; @@ -1684,11 +1684,12 @@ TEST(fully_connected_onednn_gpu, no_biases_int8) { topology.add(ri); topology.add(rf); - cldnn::build_options force_options; - implementation_desc fc_impl = { format::bfyx, "", impl_types::onednn }; - force_options.set_option(build_option::force_implementations({ {"fc_prim", fc_impl} })); + ov::intel_gpu::ImplementationDesc fc_impl = { format::bfyx, "", impl_types::onednn }; - network network(engine, topology, force_options); + ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl} }) + }; + network network(engine, topology, cfg); network.set_input_data("input", input_prim); auto outputs = network.execute(); @@ -1714,7 +1715,7 @@ TEST(fully_connected_3d_onednn_gpu, no_biases_int8) { weight_o = 4, weight_i = 3, // size of the whole weights buffer output_b = 2, output_f = 4; - auto& engine = get_onednn_test_engine(); + auto& engine = get_test_engine(); if (!engine.get_device_info().supports_immad) return; @@ -1736,11 +1737,10 @@ TEST(fully_connected_3d_onednn_gpu, no_biases_int8) { topology.add(ri); topology.add(rf); - cldnn::build_options force_options; - implementation_desc fc_impl = { format::bfyx, "", impl_types::onednn }; - force_options.set_option(build_option::force_implementations({ { "fc_prim", fc_impl } })); + ov::intel_gpu::ImplementationDesc fc_impl = { format::bfyx, "", impl_types::onednn }; + ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim", fc_impl } })}; - network network(engine, topology, force_options); + network network(engine, topology, cfg); network.set_input_data("input", input_prim); auto outputs = network.execute(); @@ -1778,10 +1778,10 @@ TEST(fully_connected_gpu, dynamic) { fully_connected("fc", input_info("input"), "weights") }; - build_options options; - options.set_option(build_option::optimize_data(true)); - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); network.set_input_data("input", input_data); auto outputs = network.execute(); @@ -1828,10 +1828,10 @@ TEST(fully_connected_gpu, dynamic_multi_inference_same_shape) { fully_connected("fc", input_info("input"), "weights") }; - build_options options; - options.set_option(build_option::optimize_data(true)); - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); { network.set_input_data("input", input_data1); @@ -1908,10 +1908,10 @@ TEST(fully_connected_gpu, dynamic_multi_inference_different_shape) { fully_connected("fc", input_info("input"), "weights") }; - build_options options; - options.set_option(build_option::optimize_data(true)); - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); auto inst = network.get_primitive("fc"); auto impl = inst->get_impl(); @@ -1998,10 +1998,10 @@ TEST(fully_connected_gpu, dynamic_multi_inference_multiple_shapes) { fully_connected("fc", input_info("input"), "weights") }; - build_options options; - options.set_option(build_option::optimize_data(true)); - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); // Call different shape multiple times to ensure caching works fine for (size_t i = 0; i < 2; i++) { diff --git a/src/plugins/intel_gpu/tests/test_cases/gather_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/gather_gpu_test.cpp index bc5e807e590..de030f29958 100644 --- a/src/plugins/intel_gpu/tests/test_cases/gather_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/gather_gpu_test.cpp @@ -1889,9 +1889,9 @@ TEST(gather_gpu_fp32, dynamic_322_axisF) { topology.add(input_layout("input2", in2_layout)); topology.add(gather("gather", input_info("input1"), input_info("input2"), axis, ov::Shape{})); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); network.set_input_data("input2", input2); diff --git a/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp index f553ffaaa45..e2a9d07d91a 100644 --- a/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp @@ -272,9 +272,9 @@ TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad) { gemm("output", { input_info("crop.1"), input_info("input2") }, data_types::f32, false, true) ); - build_options options; - options.set_option(build_option::optimize_data(true)); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); network.set_input_data("input2", input2); auto outputs = network.execute(); @@ -319,10 +319,10 @@ TEST(gemm_gpu, dynamic) { gemm("gemm", { input_info("input1"), input_info("input2") }, data_types::f32, false, true, 1.0f, 0.0f, 4, 2) ); - build_options options; - options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::allow_new_shape_infer(true)); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -1002,13 +1002,9 @@ public: } void execute(gemm_params& p) { -#ifdef ENABLE_ONEDNN_FOR_GPU - auto& engine = get_onednn_test_engine(); + auto& engine = get_test_engine(); if (!engine.get_device_info().supports_immad) return; -#else - auto& engine = get_test_engine(); -#endif auto y0_size = p.m_size; auto y0_pitch = p.k_size; auto x0_size = p.k_size; @@ -1108,15 +1104,16 @@ public: } topology.add(reorder("reorder_bfyx", input_info("gemm_bfyx"), format::bfyx, data_types::f32)); - build_options options; #ifdef ENABLE_ONEDNN_FOR_GPU - implementation_desc gemm_impl = { format::bfyx, "", impl_types::onednn }; + ov::intel_gpu::ImplementationDesc gemm_impl = { format::bfyx, "", impl_types::onednn }; + ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order)); #else - implementation_desc gemm_impl = { format::bfyx, p.kernel_name }; + ov::intel_gpu::ImplementationDesc gemm_impl = { format::bfyx, p.kernel_name }; + ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); #endif - options.set_option(build_option::force_implementations({ {"gemm_bfyx", gemm_impl} })); + cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm_bfyx", gemm_impl} })); - network network(engine, topology, options); + network network(engine, topology, cfg); network.set_input_data("input0", input0_mem); network.set_input_data("input1", input1_mem); if (p.beta != 0) { diff --git a/src/plugins/intel_gpu/tests/test_cases/lstm_dynamic_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/lstm_dynamic_gpu_test.cpp index 7807df79a63..fbf72495412 100644 --- a/src/plugins/intel_gpu/tests/test_cases/lstm_dynamic_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/lstm_dynamic_gpu_test.cpp @@ -245,9 +245,9 @@ struct lstm_dynamic_input_layer_test : public ::testing::Test "weights", bias_id)); - build_options opts; - opts.set_option(build_option::optimize_data(true)); - network network(engine, topology, opts); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); #if MEASURE_PERF == true using clock = std::chrono::high_resolution_clock; @@ -407,9 +407,9 @@ struct lstm_dynamic_single_layer_test : public ::testing::Test initial_hidden_id, initial_cell_id)); - build_options opts; - opts.set_option(build_option::optimize_data(true)); - network network(engine, topology, opts); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input_mem); network.set_input_data("dyn_len", dynamic_length_mem); diff --git a/src/plugins/intel_gpu/tests/test_cases/memory_test.cpp b/src/plugins/intel_gpu/tests/test_cases/memory_test.cpp index 15bf558d47a..937a1772df8 100644 --- a/src/plugins/intel_gpu/tests/test_cases/memory_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/memory_test.cpp @@ -76,10 +76,10 @@ TEST(memory_pool, basic_non_padded_relu_pipe) { std::vector input_vec = { -1.f, 2.f, -3.f, 4.f }; set_values(input, input_vec); - build_options bo; - bo.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - network network(*engine, topology, bo); + network network(*engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -108,10 +108,10 @@ TEST(memory_pool, basic_non_padded_relu_and_pooling_pipe) { topology.add(activation("relu4", input_info("relu3"), activation_func::relu)); topology.add(activation("relu5", input_info("relu4"), activation_func::relu)); - build_options bo; - bo.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - network network(*engine, topology, bo); + network network(*engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -143,10 +143,10 @@ TEST(memory_pool, multi_outputs_network) { topology.add(activation("relu6", input_info("relu5"), activation_func::relu)); topology.add(activation("relu7", input_info("relu6"), activation_func::relu)); - build_options bo; - bo.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - network network(*engine, topology, bo); + network network(*engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -181,10 +181,10 @@ TEST(memory_pool, oooq) { topology.add(concatenation("concat2", { input_info("relu4"), input_info("relu5") }, 1)); topology.add(activation("relu6", input_info("concat2"), activation_func::relu)); - build_options bo; - bo.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - network network(*engine, topology, bo); + network network(*engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -226,10 +226,10 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice) { topology.add(concatenation("concat2", { input_info("relu4"), input_info("relu5") }, 1)); topology.add(activation("relu6", input_info("concat2"), activation_func::linear, { 1.0f, 0.5f })); - build_options bo; - bo.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - network network_first(*engine, topology, bo); + network network_first(*engine, topology, config); network_first.set_input_data("input", input); auto outputs = network_first.execute(); @@ -239,7 +239,7 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice) { ASSERT_EQ(engine->get_max_used_device_memory(), (uint64_t) 2560); - network network_second(*engine, topology, bo); + network network_second(*engine, topology, config); network_second.set_input_data("input", input); auto outputs_second = network_second.execute(); @@ -301,10 +301,10 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice_weights) { convolution("conv", input_info("input"), { "weights" }, { 1, 1, 1, 2 }), softmax("softmax", input_info("conv"))); - build_options bo; - bo.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - network network_first(*engine, topology, bo); + network network_first(*engine, topology, config); network_first.set_input_data("input", input); auto outputs = network_first.execute(); uint64_t cl_mem_result = 824; @@ -317,7 +317,7 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice_weights) { auto output_layout_first = output_memory_first->get_layout(); cldnn::mem_lock output_ptr_first(output_memory_first, get_test_stream()); - network network_second(*engine, topology, bo); + network network_second(*engine, topology, config); network_second.set_input_data("input", input); auto outputs_second = network_second.execute(); @@ -387,10 +387,10 @@ TEST(memory_pool, shared_mem_pool_diff_batches) { convolution("conv", input_info("input"), { "weights" }, { 2, 1 }), softmax("softmax", input_info("conv"))); - build_options bo; - bo.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - network network_first(*engine, topo, bo); + network network_first(*engine, topo, config); network_first.set_input_data("input", input_8); auto outputs = network_first.execute(); @@ -399,7 +399,7 @@ TEST(memory_pool, shared_mem_pool_diff_batches) { topo.change_input_layout("input", input_1->get_layout());//change input layout to batch=1 - network network_second(*engine, topo, bo); + network network_second(*engine, topo, config); network_second.set_input_data("input", input_1); auto outputs_second = network_second.execute(); ASSERT_EQ(engine->get_max_used_device_memory(), (uint64_t)5912); @@ -411,42 +411,19 @@ TEST(memory_pool, shared_dep_two_output) { // as it's tracked within engine instance auto engine = create_test_engine(); - auto batch_1 = 1; - auto feature_num = 1; - auto inp_x_size = 4; - auto inp_y_size = 4; - auto dt = data_types::f32; - auto fmt = format::bfyx; - layout lay_batch_1 = { dt, fmt,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_1)) } }; - auto input_1 = engine->allocate_memory(lay_batch_1); + auto input_1 = engine->allocate_memory({ {1, 1, 4, 4}, data_types::f32, format::bfyx }); set_random_values(input_1); - //build primitives - auto constant_0_0 = cldnn::data( - "constant_0_0", - input_1 - ); - auto result_1_0 = cldnn::concatenation( - "result_1_0", - { input_info(constant_0_0) }, - 0 - ); - auto result_2_0 = cldnn::concatenation( - "result_2_0", - { input_info(constant_0_0) }, - 0 - ); - //build and execute network topology topo; - topo.add(constant_0_0); - topo.add(result_1_0); - topo.add(result_2_0); + topo.add(cldnn::data("constant_0_0", input_1)); + topo.add(cldnn::concatenation("result_1_0", { input_info("constant_0_0") }, 0)); + topo.add(cldnn::concatenation("result_2_0", { input_info("constant_0_0") }, 0)); - build_options bo; - bo.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - network network(*engine, topo, bo); + network network(*engine, topo, config); auto outputs = network.execute(); ASSERT_EQ(engine->get_max_used_device_memory(), (uint64_t)192); } @@ -484,9 +461,8 @@ TEST(memory_pool, non_opt_intermidate_opt_after) { data_memory ); - build_options bo; - bo.set_option(build_option::optimize_data(false)); - network network(engine, topology, bo); + ExecutionConfig config(ov::intel_gpu::optimize_data(false)); + network network(engine, topology, config); network.set_input_data("input1", input_memory1); network.set_input_data("input2", input_memory2); auto outputs = network.execute(); @@ -533,9 +509,9 @@ TEST(memory_pool, add_mem_dep_test) { actv3, actv4 ); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input1", input_memory1); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), static_cast(2)); diff --git a/src/plugins/intel_gpu/tests/test_cases/multiclass_nms_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/multiclass_nms_gpu_test.cpp index 6db2e58932f..6345eb21f14 100644 --- a/src/plugins/intel_gpu/tests/test_cases/multiclass_nms_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/multiclass_nms_gpu_test.cpp @@ -168,9 +168,9 @@ public: topology.add(primitive); topology.add(reorder("multiclass_nms", input_info("multiclass_nms_reordered"), plain_format, data_type)); - build_options bo; - bo.set_option(build_option::optimize_data(false)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(false)); + network network(engine, topology, config); network.set_input_data("input_boxes", input_boxes); network.set_input_data("input_scores", input_scores); diff --git a/src/plugins/intel_gpu/tests/test_cases/multiple_streams_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/multiple_streams_gpu_test.cpp index 212c2ba9203..a7abe8e326d 100644 --- a/src/plugins/intel_gpu/tests/test_cases/multiple_streams_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/multiple_streams_gpu_test.cpp @@ -19,13 +19,13 @@ using namespace ::tests; TEST(multistream_gpu, basic) { const int num_streams = 2; - auto config = InferenceEngine::CPUStreamsExecutor::Config(); - config._streams = num_streams; - auto task_executor = std::make_shared(config); + auto task_config = InferenceEngine::CPUStreamsExecutor::Config(); + task_config._streams = num_streams; + auto task_executor = std::make_shared(task_config); auto& engine = get_test_engine(); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); auto input1_dyn_layout = layout{ ov::PartialShape::dynamic(3), data_types::f16,format::bfyx }; auto input2_dyn_layout = layout{ ov::PartialShape::dynamic(3), data_types::f16,format::bfyx }; @@ -39,7 +39,7 @@ TEST(multistream_gpu, basic) { topology.add(fully_connected("fc", input_info("eltwise"), "weights")); topology.add(shape_of("shape_of", input_info("fc"), 3, data_types::i32)); - auto prog_ptr = program::build_program(engine, topology, bo); + auto prog_ptr = program::build_program(engine, topology, config); std::vector networks; for (size_t i = 0; i < num_streams; i++) { networks.push_back(network::allocate_network(engine, prog_ptr)); diff --git a/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp index 4164c727b1a..7f5d1e6e018 100644 --- a/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp @@ -337,9 +337,9 @@ TEST(mvn_gpu_test, dynamic_across_channels_inside_sqrt_bfyx_normalize_variance_f topology.add(input_layout("input", in_layout)); topology.add(mvn("mvn", input_info("input"), true, 1e-10f, true, true)); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto inst = network.get_primitive("mvn"); @@ -849,11 +849,11 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam { auto prim = mvn("mvn", input_info("input"), params.normalize_variance, 1e-10f, false, params.across_channels); prim.output_paddings = {output_pad}; topo.add(prim); - auto build_opts = build_options(); - build_opts.set_option(build_option::outputs({"mvn"})); - build_opts.set_option(build_option::force_implementations({ {"mvn", {format::type::bfyx, "mvn_gpu_bfyx_opt"}} })); + ExecutionConfig config; + config.set_property(ov::intel_gpu::custom_outputs(std::vector{"mvn"})); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"mvn", {format::type::bfyx, "mvn_gpu_bfyx_opt"}} })); - network net(engine, topo, build_opts); + network net(engine, topo, config); net.set_input_data("input", input); auto outputs = net.execute(); @@ -865,11 +865,11 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam { auto prim_opt = mvn("mvn_opt", input_info("input_to_target_layout"), params.normalize_variance, 1e-10f, false, params.across_channels); prim_opt.output_paddings = {output_pad}; topo_opt.add(prim_opt); - auto build_opts_opt = build_options(); - build_opts_opt.set_option(build_option::outputs({"mvn_opt", "input_to_target_layout"})); - build_opts_opt.set_option(build_option::force_implementations({ {"mvn_opt", {params.input_format, "mvn_gpu_b_fs_yx_fsv16_imad"}} })); + ExecutionConfig config_opt; + config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector{"mvn_opt", "input_to_target_layout"})); + config_opt.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"mvn_opt", {params.input_format, "mvn_gpu_b_fs_yx_fsv16_imad"}} })); - network net_opt(engine, topo_opt, build_opts_opt); + network net_opt(engine, topo_opt, config_opt); net_opt.set_input_data("input", input); auto outputs_opt = net_opt.execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/non_max_suppression_test.cpp b/src/plugins/intel_gpu/tests/test_cases/non_max_suppression_test.cpp index 3da5b291ce2..978be445557 100644 --- a/src/plugins/intel_gpu/tests/test_cases/non_max_suppression_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/non_max_suppression_test.cpp @@ -143,10 +143,10 @@ TYPED_TEST(non_max_suppression_basic, basic) { topo.add(non_max_suppression("nms", input_info("reformat_boxes"), input_info("reformat_scores"), 6, false, true)); topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32)); - build_options bo; - bo.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - cldnn::network net{engine, topo, bo}; + cldnn::network net{engine, topo, config}; auto boxes_mem = this->get_boxes_memory(engine); auto scores_mem = this->get_scores_memory(engine); @@ -204,10 +204,10 @@ TYPED_TEST(non_max_suppression_basic, num_per_class) { "num_per_class")); topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32)); - build_options bo; - bo.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - cldnn::network net{engine, topo, bo}; + cldnn::network net{engine, topo, config}; auto boxes_mem = this->get_boxes_memory(engine); auto scores_mem = this->get_scores_memory(engine); @@ -275,10 +275,10 @@ TYPED_TEST(non_max_suppression_basic, optional_outputs) { topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32)); topo.add(reorder("plane_scores", input_info("selected_scores"), format::bfyx, this->data_type)); - build_options bo; - bo.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - cldnn::network net{engine, topo, bo}; + cldnn::network net{engine, topo, config}; auto boxes_mem = this->get_boxes_memory(engine); auto scores_mem = this->get_scores_memory(engine); @@ -390,11 +390,11 @@ TYPED_TEST(non_max_suppression_basic, multiple_outputs) { topo.add(reorder("plane_scores", input_info("nms", 1), format::bfyx, this->data_type)); topo.add(reorder("plane_outputs", input_info("nms", 2), format::bfyx, cldnn::data_types::i32)); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - bo.set_option(build_option::allow_new_shape_infer(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); - cldnn::network net{engine, topo, bo}; + cldnn::network net{engine, topo, config}; auto boxes_mem = this->get_boxes_memory(engine); auto scores_mem = this->get_scores_memory(engine); @@ -500,10 +500,10 @@ TYPED_TEST(non_max_suppression_basic, iou_threshold) { "iou_threshold")); topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32)); - build_options bo; - bo.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - cldnn::network net{engine, topo, bo}; + cldnn::network net{engine, topo, config}; auto boxes_mem = this->get_boxes_memory(engine); auto scores_mem = this->get_scores_memory(engine); @@ -557,10 +557,10 @@ TYPED_TEST(non_max_suppression_basic, score_threshold) { "score_threshold")); topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32)); - build_options bo; - bo.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - cldnn::network net{engine, topo, bo}; + cldnn::network net{engine, topo, config}; auto boxes_mem = this->get_boxes_memory(engine); auto scores_mem = this->get_scores_memory(engine); @@ -618,10 +618,10 @@ TYPED_TEST(non_max_suppression_basic, soft_nms_sigma) { "soft_nms_sigma")); topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32)); - build_options bo; - bo.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - cldnn::network net{engine, topo, bo}; + cldnn::network net{engine, topo, config}; auto boxes_mem = this->get_boxes_memory(engine); auto scores_mem = this->get_scores_memory(engine); diff --git a/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp index 767523f9518..1b40623fc07 100644 --- a/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp @@ -551,11 +551,11 @@ TEST(permute_fuse_reorder_gpu_f32, basic_b_fs_yx_fsv4_permute_1_8_16_1) reorder("reorder2", input_info("permute"), format::bfyx, data_types::f32), permute("out", input_info("reorder2"), { 0, 3, 1, 2})); - cldnn::build_options options_unfused; - options_unfused.set_option(cldnn::build_option::optimize_data(false)); - options_unfused.set_option(cldnn::build_option::allow_static_input_reorder(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(false)); + config.set_property(ov::intel_gpu::allow_static_input_reorder(true)); - network unfused(engine, topology_unfused, options_unfused); + network unfused(engine, topology_unfused, config); unfused.set_input_data("input", input); // fused network @@ -566,9 +566,9 @@ TEST(permute_fuse_reorder_gpu_f32, basic_b_fs_yx_fsv4_permute_1_8_16_1) reorder("reorder2", input_info("permute"), format::bfyx, data_types::f32), // to be fused to previous permute permute("out", input_info("reorder2"), { 0, 3, 1, 2})); // return to original value - cldnn::build_options options_fused; - options_fused.set_option(cldnn::build_option::optimize_data(true)); - network fused(engine, topology_fused, options_fused); + ExecutionConfig config_fused; + config_fused.set_property(ov::intel_gpu::optimize_data(true)); + network fused(engine, topology_fused, config_fused); fused.set_input_data("input", input); auto outputs_fused = fused.execute(); @@ -1683,22 +1683,22 @@ void TiledPermuteTest::run_test(const std::vector& si ); // run with permute_ref - cldnn::build_options options_ref; - cldnn::implementation_desc permute_ref = { format_fsv, "permute_ref" }; - options_ref.set_option(cldnn::build_option::force_implementations({ {"output", permute_ref} })); + ov::intel_gpu::ExecutionConfig config_ref; + ov::intel_gpu::ImplementationDesc permute_ref = { format_fsv, "permute_ref" }; + config_ref.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"output", permute_ref} })); - cldnn::network network_ref(engine, topology_ref, options_ref); + cldnn::network network_ref(engine, topology_ref, config_ref); network_ref.set_input_data("input", input); auto outputs_ref = network_ref.execute(); auto output_ref = outputs_ref.begin()->second.get_memory(); cldnn::mem_lock output_ref_ptr(output_ref, get_test_stream()); // run with permute_tile_8x8_4x4_fsv16 - cldnn::build_options options_tile; - cldnn::implementation_desc permute_tile_8x8_4x4_fsv = { format_fsv, "permute_tile_8x8_4x4_fsv" }; - options_tile.set_option(cldnn::build_option::force_implementations({ {"output", permute_tile_8x8_4x4_fsv} })); + ExecutionConfig config_tile; + ov::intel_gpu::ImplementationDesc permute_tile_8x8_4x4_fsv = { format_fsv, "permute_tile_8x8_4x4_fsv" }; + config_tile.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"output", permute_tile_8x8_4x4_fsv} })); - cldnn::network network_tile(engine, topology_ref, options_tile); + cldnn::network network_tile(engine, topology_ref, config_tile); network_tile.set_input_data("input", input); auto outputs_tile = network_tile.execute(); auto output_tile = outputs_tile.begin()->second.get_memory(); @@ -1866,9 +1866,9 @@ TEST(permute_gpu_f32_dynamic, bfyx_0_2_3_1) { input_layout("input", input_layout_dynamic), permute("permute", input_info("input"), { 0, 2, 3, 1 })); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto inst = network.get_primitive("permute"); diff --git a/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp index 7d02a365f0a..a81a235805a 100644 --- a/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp @@ -300,8 +300,8 @@ TEST(pooling_forward_gpu, basic_max_pooling_int8) { network network( engine, topology, - build_options{ - build_option::outputs({ "reorder2" }) + ExecutionConfig{ + ov::intel_gpu::custom_outputs(std::vector{ "reorder2" }) }); network.set_input_data("input", input_memory); @@ -352,8 +352,8 @@ TEST(pooling_forward_gpu, basic_avg_pooling_int8) { network network( engine, topology, - build_options{ - build_option::outputs({ "reorder2" }) + ExecutionConfig{ + ov::intel_gpu::custom_outputs(std::vector{ "reorder2" }) }); network.set_input_data("input", input_memory); @@ -1934,13 +1934,10 @@ public: } virtual void run_expect(const VVVVVF& expected) { - auto& eng = get_test_engine(); auto topo = build_topology(eng); - auto opts = build_options( - build_option::optimize_data(true) - ); - cldnn::network net(eng, topo, opts); + ExecutionConfig config(ov::intel_gpu::optimize_data(true)); + cldnn::network net(eng, topo, config); auto input_size = tensor(batch(batch_num()), feature(input_features()), spatial(input_x(), input_y(), input_z())); auto input_lay = layout(input_type(), @@ -2336,9 +2333,9 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x8x8_input_2x2_pool_2x2_stride) tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor))); - build_options op; - op.set_option(build_option::outputs({"bsv16_fsv16_pooling", "reorder_pooling"})); - network bsv16_fsv16_network(engine, tested_topology, op); + ExecutionConfig config; + config.set_property(ov::intel_gpu::custom_outputs(std::vector{"bsv16_fsv16_pooling", "reorder_pooling"})); + network bsv16_fsv16_network(engine, tested_topology, config); bsv16_fsv16_network.set_input_data("input", input_prim); auto outputs = bsv16_fsv16_network.execute(); @@ -2420,9 +2417,9 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x2x2_input_4x4_pool_1x1_stride_1x {stride_size, stride_size}, {y_in_pad, x_in_pad})); tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor))); - build_options op; - op.set_option(build_option::outputs({"bsv16_fsv16_pooling", "reorder_pooling"})); - network bsv16_fsv16_network(engine, tested_topology, op); + ExecutionConfig config; + config.set_property(ov::intel_gpu::custom_outputs(std::vector{"bsv16_fsv16_pooling", "reorder_pooling"})); + network bsv16_fsv16_network(engine, tested_topology, config); bsv16_fsv16_network.set_input_data("input", input_prim); auto outputs = bsv16_fsv16_network.execute(); @@ -2504,9 +2501,9 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x3_stride) tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor))); - build_options op; - op.set_option(build_option::outputs({"bsv16_fsv16_pooling", "reorder_pooling"})); - network bsv16_fsv16_network(engine, tested_topology, op); + ExecutionConfig config; + config.set_property(ov::intel_gpu::custom_outputs(std::vector{"bsv16_fsv16_pooling", "reorder_pooling"})); + network bsv16_fsv16_network(engine, tested_topology, config); bsv16_fsv16_network.set_input_data("input", input_prim); auto outputs = bsv16_fsv16_network.execute(); @@ -2587,9 +2584,9 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x1_stride) {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad})); tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor))); - build_options op; - op.set_option(build_option::outputs({"bsv16_fsv16_pooling", "reorder_pooling"})); - network bsv16_fsv16_network(engine, tested_topology, op); + ExecutionConfig config; + config.set_property(ov::intel_gpu::custom_outputs(std::vector{"bsv16_fsv16_pooling", "reorder_pooling"})); + network bsv16_fsv16_network(engine, tested_topology, config); bsv16_fsv16_network.set_input_data("input", input_prim); auto outputs = bsv16_fsv16_network.execute(); @@ -2672,9 +2669,9 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x20x20_input_5x5_pool_3x1_stride) {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad})); tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor))); - build_options op; - op.set_option(build_option::outputs({"bsv16_fsv16_pooling", "reorder_pooling"})); - network bsv16_fsv16_network(engine, tested_topology, op); + ExecutionConfig config; + config.set_property(ov::intel_gpu::custom_outputs(std::vector{"bsv16_fsv16_pooling", "reorder_pooling"})); + network bsv16_fsv16_network(engine, tested_topology, config); bsv16_fsv16_network.set_input_data("input", input_prim); auto outputs = bsv16_fsv16_network.execute(); @@ -2757,9 +2754,9 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x32x20x20_input_5x5_pool_3x1_stride) {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad})); tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor))); - build_options op; - op.set_option(build_option::outputs({"bsv16_fsv16_pooling", "reorder_pooling"})); - network bsv16_fsv16_network(engine, tested_topology, op); + ExecutionConfig config; + config.set_property(ov::intel_gpu::custom_outputs(std::vector{"bsv16_fsv16_pooling", "reorder_pooling"})); + network bsv16_fsv16_network(engine, tested_topology, config); bsv16_fsv16_network.set_input_data("input", input_prim); auto outputs = bsv16_fsv16_network.execute(); @@ -2846,9 +2843,9 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x16x20x20_input_5x5_pool_3x1_stride) {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad})); tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor))); - build_options op; - op.set_option(build_option::outputs({"bsv16_fsv16_pooling", "reorder_pooling"})); - network bsv16_fsv16_network(engine, tested_topology, op); + ExecutionConfig config; + config.set_property(ov::intel_gpu::custom_outputs(std::vector{"bsv16_fsv16_pooling", "reorder_pooling"})); + network bsv16_fsv16_network(engine, tested_topology, config); bsv16_fsv16_network.set_input_data("input", input_prim); auto outputs = bsv16_fsv16_network.execute(); @@ -3202,7 +3199,7 @@ INSTANTIATE_TEST_SUITE_P(DISABLED_POOLING, #ifdef ENABLE_ONEDNN_FOR_GPU TEST(pooling_forward_gpu_onednn, basic_max_pooling_int8) { - auto& engine = get_onednn_test_engine(); + auto& engine = get_test_engine(); if (!engine.get_device_info().supports_immad) return; layout in_layout = { type_to_data_type::value, format::byxf, { 1, 1, 3, 3 } }; @@ -3228,15 +3225,16 @@ TEST(pooling_forward_gpu_onednn, basic_max_pooling_int8) { reorder("reorder2", input_info("pool1"), out_layout) ); - build_options options_target; - options_target.set_option(build_option::outputs({ "reorder2"})); - implementation_desc impl = {format::bfyx, std::string(""), impl_types::onednn}; - options_target.set_option(build_option::force_implementations({{"pool1", impl}})); + ov::intel_gpu::ImplementationDesc impl = {format::bfyx, std::string(""), impl_types::onednn}; + ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), + ov::intel_gpu::custom_outputs(std::vector{ "reorder2" }), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"pool1", impl}}), + }; network network( engine, topology, - options_target); + cfg); network.set_input_data("input", input_memory); @@ -3245,8 +3243,7 @@ TEST(pooling_forward_gpu_onednn, basic_max_pooling_int8) { auto interm = outputs.at("reorder2").get_memory(); cldnn::mem_lock interm_ptr(interm, get_test_stream()); unsigned int cntr = 0; - for (const auto& exp : final_results) - { + for (const auto& exp : final_results) { ASSERT_EQ(exp, interm_ptr[cntr++]); } } diff --git a/src/plugins/intel_gpu/tests/test_cases/prior_box_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/prior_box_gpu_test.cpp index 9e21ad3a38d..55f0a1dfb53 100644 --- a/src/plugins/intel_gpu/tests/test_cases/prior_box_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/prior_box_gpu_test.cpp @@ -90,9 +90,9 @@ public: topo.add(prior_box); topo.add(reorder("prior_box", input_info("blocked_prior_box"), plain_format, output_data_type)); - build_options bo; - bo.set_option(build_option::optimize_data(false)); - network network(engine, topo, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(false)); + network network(engine, topo, config); const auto outputs = network.execute(); const auto output = outputs.at("prior_box").get_memory(); diff --git a/src/plugins/intel_gpu/tests/test_cases/propagate_constants_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/propagate_constants_gpu_test.cpp index bddd13637fb..517bb044486 100644 --- a/src/plugins/intel_gpu/tests/test_cases/propagate_constants_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/propagate_constants_gpu_test.cpp @@ -17,8 +17,8 @@ using namespace ::tests; //This situation should be handled properly by propagate constants optimization phase TEST(propagate_constants, copy_dependecies_from_nodes) { auto& engine = get_test_engine(); - build_options build_opt; - build_opt.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); auto weights1 = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 1 } }); @@ -37,7 +37,7 @@ TEST(propagate_constants, copy_dependecies_from_nodes) { topology.add(reorder("reorder1", input_info("reshape1"), layout(data_types::f32, format::byxf, tensor(4)))); topology.add(concatenation("concat", { input_info("reorder1"), input_info("weights2") }, 3)); topology.add(convolution("conv2", { input_info("reorder2") }, { "concat" })); - network network(engine, topology, build_opt); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/quantize_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/quantize_gpu_test.cpp index 518799ed060..9312cd0ce1a 100644 --- a/src/plugins/intel_gpu/tests/test_cases/quantize_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/quantize_gpu_test.cpp @@ -211,9 +211,9 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1_ch8_binary_pack) reorder("reorder", input_info("quantize"), layout{data_types::f32, format::bfyx, tensor{1,8,2,2}}) ); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -735,10 +735,10 @@ struct quantize_random_test : testing::TestWithParam{"quantize"})); - network net(engine, topo, build_ops); + network net(engine, topo, config); net.set_input_data("input", input); auto result = net.execute(); @@ -773,9 +773,8 @@ struct quantize_random_test : testing::TestWithParamget_layout())); topology.add(input_layout("min_val", min_val->get_layout())); topology.add(input_layout("max_val", max_val->get_layout())); - build_options bo; - bo.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - cldnn::network net{engine, topology, bo}; + cldnn::network net{engine, topology, config}; net.set_input_data("shape", shape); net.set_input_data("min_val", min_val); diff --git a/src/plugins/intel_gpu/tests/test_cases/range_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/range_gpu_test.cpp index b240dee66de..1eb72f325af 100644 --- a/src/plugins/intel_gpu/tests/test_cases/range_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/range_gpu_test.cpp @@ -42,10 +42,9 @@ struct RangeArgs { step.addTo(topology); topology.add(range { "range", { input_info(start.name), input_info(stop.name), input_info(step.name) }, { dt, format::bfyx, tensor{batch(outLen)} } }); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(use_new_shape_infer)); + ExecutionConfig config(ov::intel_gpu::allow_new_shape_infer(use_new_shape_infer)); - network network { tests::get_test_engine(), topology, bo }; + network network { tests::get_test_engine(), topology, config }; start.setData(network); stop.setData(network); @@ -208,10 +207,10 @@ TEST(range_gpu_test, range_with_select) { set_values(input0, {start_val}); set_values(input2, {step_val}); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); - network network { tests::get_test_engine(), topology, bo }; + network network { tests::get_test_engine(), topology, config }; auto outputs = network.execute(); auto output = outputs.at("range").get_memory(); diff --git a/src/plugins/intel_gpu/tests/test_cases/reduce_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reduce_gpu_test.cpp index c9b273192eb..a5452f44909 100644 --- a/src/plugins/intel_gpu/tests/test_cases/reduce_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/reduce_gpu_test.cpp @@ -525,11 +525,11 @@ public: } topology.add(input_layout("input", input_mem->get_layout())); topology.add(red); - build_options options; - options.set_option(build_option::optimize_data(true)); - implementation_desc reduce_impl = {input_format, kernel_name}; - options.set_option(build_option::force_implementations({{"reduce", reduce_impl}})); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + ov::intel_gpu::ImplementationDesc reduce_impl = {input_format, kernel_name}; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"reduce", reduce_impl}})); + network network(engine, topology, config); network.set_input_data("input", input_mem); network.execute(); @@ -1739,11 +1739,11 @@ public: } topology.add(input_layout("input", input_mem->get_layout())); topology.add(red); - build_options options; - options.set_option(build_option::optimize_data(true)); - implementation_desc reduce_impl = {input_format, kernel_name}; - options.set_option(build_option::force_implementations({{"reduce", reduce_impl}})); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + ov::intel_gpu::ImplementationDesc reduce_impl = {input_format, kernel_name}; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"reduce", reduce_impl}})); + network network(engine, topology, config); network.set_input_data("input", input_mem); network.execute(); @@ -1816,7 +1816,7 @@ INSTANTIATE_TEST_SUITE_P(reduce_gpu_b_fs_yx_fsv16_xy_i8, template class ReduceOnednnTestBase : public ::testing::TestWithParam { protected: - cldnn::engine& engine = get_onednn_test_engine(); + cldnn::engine& engine = get_test_engine(); int batch_num, input_f, input_w, input_z, input_y, input_x; cldnn::format input_format = format::any; cldnn::reduce_mode reduce_mode; @@ -1893,11 +1893,12 @@ public: } topology.add(input_layout("input", input_mem->get_layout())); topology.add(red); - build_options options; - options.set_option(build_option::optimize_data(true)); - implementation_desc reduce_impl = {input_format, kernel_name, impl_types::onednn}; - options.set_option(build_option::force_implementations({{"reduce", reduce_impl}})); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + ov::intel_gpu::ImplementationDesc reduce_impl = {input_format, kernel_name, impl_types::onednn}; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"reduce", reduce_impl}})); + config.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); + network network(engine, topology, config); network.set_input_data("input", input_mem); network.execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/removing_output_node_test.cpp b/src/plugins/intel_gpu/tests/test_cases/removing_output_node_test.cpp index 7b08ef00be1..cc6be37a99b 100644 --- a/src/plugins/intel_gpu/tests/test_cases/removing_output_node_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/removing_output_node_test.cpp @@ -62,10 +62,10 @@ TEST(removing_output_node, multiple_outputs) { std::vector out_vec = { 0.0f, 3.0f, 1.0f, 4.0f, 2.0f, 5.0f }; set_values(input, input_vec); - build_options bo; - bo.set_option(build_option::outputs({ "shuffle_channels", "reshape", "strided_slice" })); + ExecutionConfig config; + config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "shuffle_channels", "reshape", "strided_slice" })); - network network(engine, topology, bo); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp index 8fc5e7ee851..339e33aa2c8 100644 --- a/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp @@ -83,11 +83,11 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name, reorder("reorder", input_info("input"), output_layout)); // run on reference(reorder_data) kernel - cldnn::build_options options_ref; - cldnn::implementation_desc reorder_ref = { output_format, "reorder_data" }; - options_ref.set_option(cldnn::build_option::force_implementations({ {"reorder", reorder_ref} })); + ov::intel_gpu::ExecutionConfig config_ref; + ov::intel_gpu::ImplementationDesc reorder_ref = { output_format, "reorder_data" }; + config_ref.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", reorder_ref} })); - network network_ref(engine, topology, options_ref); + network network_ref(engine, topology, config_ref); network_ref.set_input_data("input", input); auto outputs_ref = network_ref.execute(); @@ -95,11 +95,11 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name, e1->wait(); // run on optimized kernel - cldnn::build_options options; - cldnn::implementation_desc reorder_optimized = { output_format, kernel_name }; - options.set_option(cldnn::build_option::force_implementations({ {"reorder", reorder_optimized} })); + ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ImplementationDesc reorder_optimized = { output_format, kernel_name }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", reorder_optimized} })); - network network(engine, topology, options); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -788,8 +788,8 @@ TEST(reorder_gpu, basic_convert_f16_f32_f16) { network network( engine, topology, - build_options{ - build_option::outputs({"reorder_f16_f32", "reorder_f32_f16"}) + ExecutionConfig{ + ov::intel_gpu::custom_outputs(std::vector{"reorder_f16_f32", "reorder_f32_f16"}) }); network.set_input_data("input", input); @@ -859,8 +859,8 @@ TEST(reorder_gpu, basic_convert_int8) { network network( engine, topology, - build_options{ - build_option::outputs({ "reorder_input", "reorder2"}) + ExecutionConfig{ + ov::intel_gpu::custom_outputs(std::vector{ "reorder_input", "reorder2"}) }); network.set_input_data("input", input_memory); @@ -877,7 +877,6 @@ TEST(reorder_gpu, basic_convert_int8) { } TEST(reorder_gpu, basic_convert_uint8) { - auto& engine = get_test_engine(); layout in_layout = { type_to_data_type::value,format::byxf,{ 1, 1, 3, 3 } }; layout byte_layout = { type_to_data_type::value, format::bfyx,{ 1, 1, 3, 3 } }; @@ -907,8 +906,8 @@ TEST(reorder_gpu, basic_convert_uint8) { network network( engine, topology, - build_options{ - build_option::outputs({ "reorder_input", "reorder2"}) + ExecutionConfig{ + ov::intel_gpu::custom_outputs(std::vector{ "reorder_input", "reorder2" }) }); network.set_input_data("input", input_memory); @@ -989,8 +988,8 @@ TEST(reorder_gpu, basic_convert_uint8rgbabyxf_to_fp32_bfyx) { network network( engine, topology, - build_options{ - build_option::outputs({ "reorder_input", "crop" }) + ExecutionConfig{ + ov::intel_gpu::custom_outputs(std::vector{ "reorder_input", "crop" }) }); network.set_input_data("input", input_memory); @@ -1289,10 +1288,10 @@ TEST(reorder_gpu_f32, dynamic_bfyx_to_bfzyx) { input_layout("input", in_layout), reorder("reorder", input_info("input"), format::bfzyx, data_types::f32)); - build_options options; - options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::allow_new_shape_infer(true)); - network network(engine, topology, options); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); auto inst = network.get_primitive("reorder"); auto impl = inst->get_impl(); @@ -1494,10 +1493,10 @@ TEST(reorder_gpu_opt, basic_remove_redundant) reorder("r2", input_info("r1"), format::yxfb, data_types::f32) }; - build_options opts; - opts.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - network net(engine, tpl, opts); + network net(engine, tpl, config); net.set_input_data("in", in); auto outputs = net.execute(); auto executed_primitives = net.get_executed_primitives(); @@ -1523,10 +1522,10 @@ TEST(reorder_gpu_opt, remove_redundant_activation_fuse) eltwise("output", { input_info("relu"), input_info("scale_data") }, eltwise_mode::prod) }; - build_options opts; - opts.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - network net(engine, tpl, opts); + network net(engine, tpl, config); net.set_input_data("in", in); auto outputs = net.execute(); cldnn::mem_lock out_ptr(outputs.begin()->second.get_memory(), get_test_stream()); @@ -1547,13 +1546,13 @@ TEST(reorder_gpu_opt, basic_remove_redundant_output_due_to_implicit_reorders) reorder("r1", input_info("conv"), format::bfyx, data_types::f32) //optimize data should add conversion from yxfb to bfyx and 'conv' should output data in bfyx as well (IE case) }; - build_options opts; + ExecutionConfig config; //we need to check if r1 will be successfully opimized and still we should be able to query for r1's output which should point to conv's output (note conv cannot be marked as output in this case) - opts.set_option(build_option::outputs({ "r1" })); - opts.set_option(build_option::optimize_data(true)); + config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "r1" })); + config.set_property(ov::intel_gpu::optimize_data(true)); - network net(engine, tpl, opts); + network net(engine, tpl, config); net.set_input_data("in", in); auto outputs = net.execute(); @@ -1576,10 +1575,10 @@ TEST(reorder_gpu_opt, basic_remove_redundant_due_to_implicit_reorders) softmax("output", input_info("r1")) }; - build_options opts; - opts.set_option(build_option::optimize_data(true)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); - network net(engine, tpl, opts); + network net(engine, tpl, config); net.set_input_data("in", in); auto outputs = net.execute(); auto executed_primitives = net.get_executed_primitives(); @@ -1601,11 +1600,11 @@ TEST(reorder_gpu_opt, non_trivial_remove_redundant) reorder("r1", input_info("in"), format::bfyx, data_types::f32) }; - build_options opts; + ExecutionConfig config; - opts.set_option(build_option::optimize_data(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); - network net(engine, tpl, opts); + network net(engine, tpl, config); net.set_input_data("in", in); auto outputs = net.execute(); auto executed_primitives = net.get_executed_primitives(); @@ -1639,9 +1638,9 @@ TEST(reorder_gpu_opt, mean_mul) }; float answers[] = { 0.5f, 5.0f, -15.0f, 17.2f, 6.0f, -21.0f }; - build_options opts; - opts.set_option(build_option::optimize_data(true)); - network net(engine, tpl, opts); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network net(engine, tpl, config); net.set_input_data("in", in); auto outputs = net.execute(); @@ -1674,9 +1673,9 @@ TEST(reorder_gpu_opt, mean_div) }; float answers[] = { 2.0f, 1.0f, -1.0f, 0.5f, 4.0f, -2.0f }; - build_options opts; - opts.set_option(build_option::optimize_data(true)); - network net(engine, tpl, opts); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network net(engine, tpl, config); net.set_input_data("in", in); auto outputs = net.execute(); @@ -1705,9 +1704,9 @@ TEST(reorder_gpu_opt, mean_mul_val) }; float answers[] = { 2.0f, 4.0f, 1.5f, 2.0f, 50.0f, 600.0f }; - build_options opts; - opts.set_option(build_option::optimize_data(true)); - network net(engine, tpl, opts); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network net(engine, tpl, config); net.set_input_data("in", in); auto outputs = net.execute(); @@ -1735,9 +1734,9 @@ TEST(reorder_gpu_opt, mean_mul_val_float_to_int) }; char answers[] = { 0, 2, 1, 2, 25, 127 }; - build_options opts; - opts.set_option(build_option::optimize_data(true)); - network net(engine, tpl, opts); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network net(engine, tpl, config); net.set_input_data("in", in); auto outputs = net.execute(); @@ -1834,8 +1833,8 @@ TEST(reorder_gpu_binary, binary_output) { auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); layout output_layout(data_types::bin, format::b_fs_yx_32fp, { 2, 2, 2, 2 }); @@ -1882,8 +1881,8 @@ TEST(reorder_gpu_binary, binary_input) { auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::optimize_data(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::bin, format::b_fs_yx_32fp,{ 2, 2, 2, 2 } }); layout output_layout(data_types::f32, format::bfyx, { 2, 2, 2, 2 }); @@ -1974,9 +1973,9 @@ TEST(reorder_gpu_f32, bfwzyx_bfyx_chain) reorder("reorder3", input_info("reshape3"), format::bfyx, data_types::f32, sub_bfyx), reorder("out_reorder", input_info("reorder3"), format::bfwzyx, data_types::f32) ); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2176,9 +2175,9 @@ TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_allowed) reorder(reorder_name, input_info("first_activation"), format::bfyx, data_types::f32), activation("second_activation", input_info(reorder_name), activation_func::abs)); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2223,9 +2222,9 @@ TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_not_allowed) reorder(reorder_name, input_info("input"), format::bfyx, data_types::f32), convolution("convolution", input_info(reorder_name), {"weights"}, { 1, 1 }, { 1, 1 }, { 1, 1 })); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2279,9 +2278,9 @@ TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_padded) reorder(reorder_name, input_info("input"), format::bfyx, data_types::f32), activation("activation", input_info(reorder_name), activation_func::abs)); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2569,20 +2568,16 @@ struct reorder_test_param { template class ReorderTest : public ::testing::TestWithParam { public: -#ifdef ENABLE_ONEDNN_FOR_GPU - cldnn::engine& engine = get_onednn_test_engine(); -#else cldnn::engine& engine = get_test_engine(); -#endif cldnn::topology topology_test; - cldnn::build_options bo_test; + ExecutionConfig config; static const int min_random = -200; static const int max_random = 200; std::vector executed_prims; void execute(T& p) { auto input_prim = this->get_mem(get_input_layout(p)); - network network_test(this->engine, this->topology_test, this->bo_test); + network network_test(this->engine, this->topology_test, this->config); network_test.set_input_data("input", input_prim); executed_prims = network_test.get_executed_primitive_ids(); @@ -2601,11 +2596,11 @@ public: } void SetUp() override { - bo_test.set_option(build_option::optimize_data(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); } - void setup_with_build_ops(cldnn::build_options& build_ops) { - bo_test = build_ops; + void setup_with_build_ops(const ExecutionConfig& c) { + config = c; } cldnn::memory::ptr get_mem(cldnn::layout l) { @@ -2710,12 +2705,13 @@ TEST_P(testing_removal_reorder, removal_no_padded_reorder) { reorder("reorder_output", input_info("conv_output"), p.default_format, data_types::f32) ); - auto build_opts = build_options(); - build_opts.set_option(build_option::optimize_data(true)); - implementation_desc impl = { format::b_fs_yx_fsv16, std::string(""), impl_types::ocl }; - build_opts.set_option(build_option::force_implementations({ {"conv_output", impl} })); + ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, std::string(""), impl_types::ocl }; + ExecutionConfig config{ov::intel_gpu::queue_type(QueueTypes::in_order), + ov::intel_gpu::optimize_data(true), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv_output", impl} }) + }; - setup_with_build_ops(build_opts); + setup_with_build_ops(config); execute(p); @@ -2739,12 +2735,13 @@ TEST_P(testing_removal_reorder, removal_padded_reorder) { reorder("reorder_output", input_info("conv_output"), p.default_format, data_types::f32) ); - auto build_opts = build_options(); - build_opts.set_option(build_option::optimize_data(true)); - implementation_desc impl = { format::b_fs_yx_fsv16, std::string(""), impl_types::ocl }; - build_opts.set_option(build_option::force_implementations({ {"conv_output", impl} })); + ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, std::string(""), impl_types::ocl }; + ExecutionConfig config{ov::intel_gpu::queue_type(QueueTypes::in_order), + ov::intel_gpu::optimize_data(true), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv_output", impl} }) + }; - setup_with_build_ops(build_opts); + setup_with_build_ops(config); execute(p); @@ -2761,7 +2758,7 @@ INSTANTIATE_TEST_SUITE_P(reorder_gpu_testing, testing_removal_reorder, #ifdef ENABLE_ONEDNN_FOR_GPU TEST(reorder_onednn_gpu, basic_convert_int8) { - auto& engine = get_onednn_test_engine(); + auto& engine = get_test_engine(); if (!engine.get_device_info().supports_immad) return; layout in_layout = { type_to_data_type::value, format::byxf, { 1, 1, 3, 3 } }; @@ -2789,15 +2786,16 @@ TEST(reorder_onednn_gpu, basic_convert_int8) { reorder("reorder2", input_info("reorder_input"), in_layout) ); - build_options options_target; - options_target.set_option(build_option::outputs({ "reorder_input", "reorder2"})); - implementation_desc impl = { format::bfyx, std::string(""), impl_types::onednn }; - options_target.set_option(build_option::force_implementations({{ "reorder_input", impl }})); + ov::intel_gpu::ImplementationDesc impl = { format::bfyx, std::string(""), impl_types::onednn }; + ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), + ov::intel_gpu::custom_outputs(std::vector{ "reorder_input", "reorder2"}), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{ "reorder_input", impl }}), + }; network network( engine, topology, - options_target); + cfg); network.set_input_data("input", input_memory); diff --git a/src/plugins/intel_gpu/tests/test_cases/resample_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/resample_gpu_test.cpp index 85bd31b1b90..f166784c648 100644 --- a/src/plugins/intel_gpu/tests/test_cases/resample_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/resample_gpu_test.cpp @@ -466,10 +466,8 @@ struct resample_random_test : testing::TestWithParam{"resample"})); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample", {params.in_format, "resample_ref"}} })); - cldnn::network net(engine, topo, build_opts); + cldnn::network net(engine, topo, config); net.set_input_data("in", in_mem); auto result = net.execute(); @@ -645,11 +643,11 @@ struct caffe_resample_random_test : testing::TestWithParam{"resample_opt"})); + config_opt.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample_opt", {params.in_format, "resample_opt"}} })); - cldnn::network net_opt(engine, topo_opt, build_opts_opt); + cldnn::network net_opt(engine, topo_opt, config_opt); // Use in_mem from ref network net_opt.set_input_data("in", in_mem); @@ -721,8 +719,8 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest1) { // Sample Type: Nearest auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; int f = 2; @@ -754,7 +752,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest1) { 21.f, 22.f, 23.f, }); - cldnn::network net{ engine, topology, options }; + cldnn::network net{ engine, topology, config }; net.set_input_data("input", input); @@ -811,8 +809,8 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest2) { // Sample Type: Nearest auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; int f = 2; @@ -844,7 +842,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest2) { 21.f, 22.f, 23.f, }); - cldnn::network net{ engine, topology, options }; + cldnn::network net{ engine, topology, config }; net.set_input_data("input", input); @@ -901,8 +899,8 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest3) { // Sample Type: Nearest auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; int f = 2; @@ -934,7 +932,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest3) { 21.f, 22.f, 23.f, }); - cldnn::network net{ engine, topology, options }; + cldnn::network net{ engine, topology, config }; net.set_input_data("input", input); @@ -991,8 +989,8 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest4) { // Sample Type: Nearest auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; int f = 2; @@ -1024,7 +1022,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest4) { 21.f, 22.f, 23.f, }); - cldnn::network net{ engine, topology, options }; + cldnn::network net{ engine, topology, config }; net.set_input_data("input", input); @@ -1081,8 +1079,8 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest5) { // Sample Type: Nearest auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; int f = 2; @@ -1114,7 +1112,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest5) { 21.f, 22.f, 23.f, }); - cldnn::network net{ engine, topology, options }; + cldnn::network net{ engine, topology, config }; net.set_input_data("input", input); @@ -1171,8 +1169,8 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode1) { // Sample Type: Nearest auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; int f = 2; @@ -1206,7 +1204,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode1) { 21.f, 22.f, 23.f, }); - cldnn::network net{ engine, topology, options }; + cldnn::network net{ engine, topology, config }; net.set_input_data("input", input); @@ -1241,8 +1239,8 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode2) { // Sample Type: Nearest auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; int f = 2; @@ -1276,7 +1274,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode2) { 21.f, 22.f, 23.f, }); - cldnn::network net{ engine, topology, options }; + cldnn::network net{ engine, topology, config }; net.set_input_data("input", input); @@ -1305,8 +1303,8 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode3) { // Sample Type: Nearest auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; int f = 2; @@ -1340,7 +1338,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode3) { 21.f, 22.f, 23.f, }); - cldnn::network net{ engine, topology, options }; + cldnn::network net{ engine, topology, config }; net.set_input_data("input", input); @@ -1375,8 +1373,8 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode4) { // Sample Type: Nearest auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; int f = 2; @@ -1410,7 +1408,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode4) { 21.f, 22.f, 23.f, }); - cldnn::network net{ engine, topology, options }; + cldnn::network net{ engine, topology, config }; net.set_input_data("input", input); @@ -1445,8 +1443,8 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode5) { // Sample Type: Nearest auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; int f = 2; @@ -1480,7 +1478,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode5) { 21.f, 22.f, 23.f, }); - cldnn::network net{ engine, topology, options }; + cldnn::network net{ engine, topology, config }; net.set_input_data("input", input); @@ -1515,8 +1513,8 @@ TEST(resample_gpu, interpolate_in2x2x3x2_cubic) { // Sample Type: Nearest auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; int f = 2; @@ -1548,7 +1546,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_cubic) { 21.f, 22.f, 23.f, }); - cldnn::network net{ engine, topology, options }; + cldnn::network net{ engine, topology, config }; net.set_input_data("input", input); @@ -1583,8 +1581,8 @@ TEST(resample_gpu, interpolate_in2x2x3x2_cubic2) { // Sample Type: Nearest auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 1; int f = 1; @@ -1609,7 +1607,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_cubic2) { 3.f, 4.f, 5.f, }); - cldnn::network net{ engine, topology, options }; + cldnn::network net{ engine, topology, config }; net.set_input_data("input", input); @@ -1636,8 +1634,8 @@ TEST(resample_gpu, interpolate_in2x2x3x2_linear) { // Sample Type: Nearest auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; int f = 2; @@ -1669,7 +1667,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_linear) { 21.f, 22.f, 23.f, }); - cldnn::network net{ engine, topology, options }; + cldnn::network net{ engine, topology, config }; net.set_input_data("input", input); @@ -1863,8 +1861,8 @@ TEST(resample_gpu, interpolate_in1x1x2x4_linear_scale) { // Sample Type: Linear auto& engine = get_test_engine(); - cldnn::build_options options; - options.set_option(cldnn::build_option::allow_new_shape_infer(true)); + ov::intel_gpu::ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 1; int f = 1; @@ -1891,7 +1889,7 @@ TEST(resample_gpu, interpolate_in1x1x2x4_linear_scale) { 5.f, 6.f, 7.f, 8.f, }); - cldnn::network net{ engine, topology, options }; + cldnn::network net{ engine, topology, config }; net.set_input_data("input", input); @@ -2022,10 +2020,10 @@ struct resample_opt_random_test : testing::TestWithParam{"resample"})); - network net(engine, topo, build_opts); + network net(engine, topo, config); net.set_input_data("in", in_mem); // first execution of ref @@ -2041,11 +2039,11 @@ struct resample_opt_random_test : testing::TestWithParam{"resample_opt", "res_to_bfyx"})); + config_opt.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample_opt", {params.in_format, kernel}} })); - network net_opt(engine, topo_opt, build_opts_opt); + network net_opt(engine, topo_opt, config_opt); // Use in_mem from ref network net_opt.set_input_data("in", in_mem); @@ -2115,19 +2113,8 @@ struct resample_opt_random_test_ext : resample_opt_random_test std::cout << std::endl; } - cldnn::engine_configuration get_profiling_config() { - //const bool enable_profiling = true; - std::string sources_dumps_dir = ""; - cldnn::queue_types queue_type = cldnn::queue_types::out_of_order; - priority_mode_types priority_mode = priority_mode_types::disabled; - throttle_mode_types throttle_mode = throttle_mode_types::disabled; - bool use_memory_pool = true; - bool use_unified_shared_memory = true; - return engine_configuration(true, queue_type, sources_dumps_dir, priority_mode, throttle_mode, use_memory_pool, use_unified_shared_memory); - } - void execute_perf_test(const resample_opt_random_test_params& params, const std::string& kernel, const bool do_planar = false) { - auto& engine = get_test_engine(get_profiling_config()); + auto& engine = get_test_engine(); const format origin_format = format::dimension(params.in_format) == 4 ? format::bfyx : format::bfzyx; auto in_layout = layout(params.input_type, origin_format, params.input_size); @@ -2145,13 +2132,12 @@ struct resample_opt_random_test_ext : resample_opt_random_test topo_opt.add(prim_opt); topo_opt.add(reorder("res_to_bfyx", input_info("resample_opt"), origin_format, params.input_type)); - auto build_opts_opt = build_options(); - build_opts_opt.set_option(build_option::outputs({"res_to_bfyx"})); - build_opts_opt.set_option(build_option::force_implementations({ {"resample_opt", {working_format, kernel}} })); - build_opts_opt.set_option(build_option::debug(true)); - // optimize_data is turned on to test cross-layout + ExecutionConfig cfg{ov::enable_profiling(true), + ov::intel_gpu::custom_outputs(std::vector{"res_to_bfyx"}), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample_opt", {working_format, kernel}} }) + }; - network net_opt(engine, topo_opt, build_opts_opt); + network net_opt(engine, topo_opt, cfg); // Use in_mem from ref network net_opt.set_input_data("in", in_mem); diff --git a/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp index 8858c3a99f3..a6ab1600b90 100644 --- a/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp @@ -65,10 +65,10 @@ void generic_reshape_test(format fmt, tensor const& input_size, tensor const& re } tpl.add(reshape("reshape", reshape_input, reshape_size, cldnn::reshape::reshape_mode::base, output_padd)); - build_options bo; - bo.set_option(build_option::outputs({reshape_input, "reshape"})); + ExecutionConfig config; + config.set_property(ov::intel_gpu::custom_outputs(std::vector{reshape_input, "reshape"})); - network net(engine, tpl, bo); + network net(engine, tpl, config); net.set_input_data("input", input); auto outputs = net.execute(); @@ -613,9 +613,9 @@ TEST(reshape_gpu_f32, shrink_chain_partial) { std::vector out = {5.f, 12.f, 15.f, 32.0f}; set_values(input, input_vec); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -653,9 +653,9 @@ TEST(reshape_gpu_f32, shrink_chain_full) { std::vector out = {5.f, 12.f, 15.f, 32.0f}; set_values(input, input_vec); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -688,9 +688,9 @@ TEST(reshape_gpu_f32, shrink_chain_out) { std::vector out = {0.f, 2.f, 0.f, 4.0f}; set_values(input, input_vec); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -727,9 +727,9 @@ TEST(reshape_gpu_f32, basic_runtime_static_shape) { set_values(input, input_data); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -775,10 +775,10 @@ TEST(reshape_gpu_f32, basic_runtime_dynamic_shape) { set_values(input, input_data); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -826,10 +826,10 @@ TEST(reshape_gpu_f32, basic_runtime_dynamic_shape_with_const) { set_values(input, input_data); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -883,10 +883,10 @@ TEST(reshape_gpu_f32, basic_runtime_dynamic_shape_with_const_optimized_out) { set_values(input, input_data); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/set_output_memory_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/set_output_memory_gpu_test.cpp index e8e5b221928..ae3e0e8ec19 100644 --- a/src/plugins/intel_gpu/tests/test_cases/set_output_memory_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/set_output_memory_gpu_test.cpp @@ -317,9 +317,9 @@ TEST(set_output_memory_gpu, basic_opt) { primitive_id outputID = "reorder3"; topology.add(reorder(outputID, input_info("concat"), ol)); - build_options bo; - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -369,7 +369,7 @@ TEST(set_output_memory_gpu, mutable_output_data) { /*b1f3*/4.f, 0.5f, 8.f, 8.2f }; set_values(input, input_vec); - auto prog = program::build_program(engine, topology, build_options()); + auto prog = program::build_program(engine, topology, ExecutionConfig{}); network network(prog, 0); network.set_input_data("Add_1396", input); diff --git a/src/plugins/intel_gpu/tests/test_cases/shape_of_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/shape_of_gpu_test.cpp index 679f54bd10a..576b065ef68 100644 --- a/src/plugins/intel_gpu/tests/test_cases/shape_of_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/shape_of_gpu_test.cpp @@ -129,9 +129,9 @@ TEST(shape_of_gpu, dynamic) { topology.add(input_layout("input", in_layout)); topology.add(shape_of("shape_of", input_info("input"), 5, data_types::i32)); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); auto inst = network.get_primitive("shape_of"); auto impl = inst->get_impl(); diff --git a/src/plugins/intel_gpu/tests/test_cases/softmax_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/softmax_gpu_test.cpp index c07ce1be3e0..9483d75257a 100644 --- a/src/plugins/intel_gpu/tests/test_cases/softmax_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/softmax_gpu_test.cpp @@ -920,8 +920,8 @@ public: set_values(input, params.input); - build_options bo; - bo.set_option(build_option::optimize_data(false)); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(false)); network network(engine, topology); network.set_input_data("input", input); @@ -1022,9 +1022,9 @@ TEST(softmax_gpu_bfyx_f32, normalize_f_dynamic) { 0.977054322f //b=1, y=1, x=1 }; - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); network.set_input_data("input", input); auto inst = network.get_primitive("softmax"); diff --git a/src/plugins/intel_gpu/tests/test_cases/split_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/split_gpu_test.cpp index 83b918f80d7..0e588336101 100644 --- a/src/plugins/intel_gpu/tests/test_cases/split_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/split_gpu_test.cpp @@ -223,9 +223,9 @@ TEST(split_gpu_f32, basic_split_concat_optimization) { topology.add(concatenation("concat", inputs, 1)); topology.add(reorder("output", input_info("concat"), format::bfyx, data_types::f32)); - build_options opts; - opts.set_option(build_option::optimize_data(true)); - network network(engine, topology, opts); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); @@ -263,9 +263,9 @@ TEST(split_gpu_i64, basic_split_concat_optimization) { topology.add(concatenation("concat", inputs, 1)); topology.add(reorder("output", input_info("concat"), format::bfyx, data_types::i64)); - build_options opts; - opts.set_option(build_option::optimize_data(true)); - network network(engine, topology, opts); + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); network.set_input_data("input", input); diff --git a/src/plugins/intel_gpu/tests/test_cases/streams_test.cpp b/src/plugins/intel_gpu/tests/test_cases/streams_test.cpp index bb42c726ad4..75a7953b7d4 100644 --- a/src/plugins/intel_gpu/tests/test_cases/streams_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/streams_test.cpp @@ -29,7 +29,7 @@ TEST(gpu_streams, can_create_networks_for_stream) { topology topology( input_layout("input", input->get_layout()), activation("relu", input_info("input"), activation_func::relu_negative_slope, activation_additional_params{ 0.5f, 0.f }, padding{ { 0, 0, 0, 0 }, 0 })); - network network(engine, topology, build_options()); + network network(engine, topology, ExecutionConfig{}); network.set_input_data("input", input); auto outputs = network.execute(); @@ -72,7 +72,7 @@ TEST(gpu_streams, check_networks_can_use_the_same_weights) { convolution("conv", input_info("input"), { "weights" }, { 2, 1 })); set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f }); - auto prog = program::build_program(engine, topology, build_options()); + auto prog = program::build_program(engine, topology, ExecutionConfig{}); network network0(prog, 0); network network1(prog, 1); @@ -136,7 +136,7 @@ TEST(gpu_streams, check_networks_use_unique_mutable_data_per_stream) { convolution("conv", input_info("input"), { "weights" }, { 2, 1 })); set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f }); - auto prog = program::build_program(engine, topology, build_options()); + auto prog = program::build_program(engine, topology, ExecutionConfig{}); network network0(prog, 0); network network1(prog, 1); diff --git a/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp index f5d41f7bb60..534c86729e2 100644 --- a/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp @@ -1497,9 +1497,9 @@ TEST(strided_slice_gpu, test_2x2x2x1x1_2_negative_all_dynamic) { topology.add(data("input4", strides)); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {})); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); network.set_input_data("input", input); @@ -1541,9 +1541,9 @@ TEST(strided_slice_gpu, test_2x2x2x1x1_2_negative_all_dynamic_begin) { topology.add(data("input4", strides)); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {})); - build_options bo; - bo.set_option(build_option::allow_new_shape_infer(true)); - network network(engine, topology, bo); + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); network.set_input_data("input2", begin); diff --git a/src/plugins/intel_gpu/tests/test_cases/test_device_mem_usage_estimation.cpp b/src/plugins/intel_gpu/tests/test_cases/test_device_mem_usage_estimation.cpp index 19ebc1c0454..1a9f3c13efe 100644 --- a/src/plugins/intel_gpu/tests/test_cases/test_device_mem_usage_estimation.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/test_device_mem_usage_estimation.cpp @@ -12,7 +12,8 @@ using namespace cldnn; using namespace tests; TEST(test_device_mem_usage_estimation, basic) { - std::shared_ptr engine1 = create_test_engine(cldnn::queue_types::out_of_order); + ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); + std::shared_ptr engine1 = create_test_engine(); auto input1 = engine1->allocate_memory({ data_types::f16, format::bfyx,{ 2, 2, 256, 256} }); auto input2 = engine1->allocate_memory({ data_types::f16, format::bfyx,{ 2, 2, 256, 256} }); @@ -25,14 +26,14 @@ TEST(test_device_mem_usage_estimation, basic) { reorder("output", input_info("eltw"), format::bfyx, data_types::f32) ); - auto prog = program::build_program(*engine1, topology, build_options()); + auto prog = program::build_program(*engine1, topology, cfg); std::pair estimated_mem_usage = prog->get_estimated_device_mem_usage(); - std::shared_ptr engine2 = create_test_engine(cldnn::queue_types::out_of_order); + std::shared_ptr engine2 = create_test_engine(); auto input3 = engine2->allocate_memory({ data_types::f16, format::bfyx,{ 2, 2, 256, 256} }); auto input4 = engine2->allocate_memory({ data_types::f16, format::bfyx,{ 2, 2, 256, 256} }); - network network(*engine2, topology); + network network(*engine2, topology, cfg); network.set_input_data("input1", input3); network.set_input_data("input2", input4); ASSERT_EQ(estimated_mem_usage.first + estimated_mem_usage.second, engine2->get_used_device_memory(allocation_type::usm_device)); diff --git a/src/plugins/intel_gpu/tests/test_cases/trim_to_outputs_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/trim_to_outputs_gpu_test.cpp index 279558168a4..2ede2d2738e 100644 --- a/src/plugins/intel_gpu/tests/test_cases/trim_to_outputs_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/trim_to_outputs_gpu_test.cpp @@ -24,9 +24,9 @@ using namespace ::tests; */ TEST(trim_to_outputs, one_node_to_eliminate_case1) { auto& engine = get_test_engine(); - build_options build_opt; - build_opt.set_option(cldnn::build_option::outputs({ "conv1" })); - build_opt.set_option(build_option::optimize_data(false)); // to avoid adding reorders + ExecutionConfig config; + config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv1" })); + config.set_property(ov::intel_gpu::optimize_data(false)); // to avoid adding reorders auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 1, 1 } }); auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); @@ -45,7 +45,7 @@ TEST(trim_to_outputs, one_node_to_eliminate_case1) { topology.add(cldnn::convolution("conv1", { input_info("input") }, { "weights" }, { "bias" })); topology.add(cldnn::convolution("conv2", { input_info("input") }, { "weights" }, { "bias" })); - network network(engine, topology, build_opt); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -73,9 +73,9 @@ Network structure: input -> conv1 (output) */ TEST(trim_to_outputs, one_node_to_eliminate_case2) { auto& engine = get_test_engine(); - build_options build_opt; - build_opt.set_option(cldnn::build_option::outputs({ "conv1" })); - build_opt.set_option(build_option::optimize_data(false)); // to avoid adding reorders + ExecutionConfig config; + config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv1" })); + config.set_property(ov::intel_gpu::optimize_data(false)); // to avoid adding reorders auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 1, 1 } }); auto weights1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); @@ -100,7 +100,7 @@ TEST(trim_to_outputs, one_node_to_eliminate_case2) { topology.add(data("bias2", bias2)); topology.add(cldnn::convolution("conv2", { input_info("input") }, { "weights2" }, { "bias2" })); - network network(engine, topology, build_opt); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); @@ -130,9 +130,9 @@ Convolutions conv2, conv3 should be optimized out along with weights23 shered by */ TEST(trim_to_outputs, two_nodes_to_eliminate_case1) { auto& engine = get_test_engine(); - build_options build_opt; - build_opt.set_option(cldnn::build_option::outputs({ "conv4" })); - build_opt.set_option(build_option::optimize_data(false)); // to avoid adding reorders + ExecutionConfig config; + config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv4" })); + config.set_property(ov::intel_gpu::optimize_data(false)); // to avoid adding reorders auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 1, 1 } }); auto weights1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); @@ -159,7 +159,7 @@ TEST(trim_to_outputs, two_nodes_to_eliminate_case1) { topology.add(data("weights4", weights4)); topology.add(cldnn::convolution("conv4", { input_info("conv1") }, { "weights4" }, { "bias" })); - network network(engine, topology, build_opt); + network network(engine, topology, config); network.set_input_data("input", input); auto outputs = network.execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/variable.cpp b/src/plugins/intel_gpu/tests/test_cases/variable.cpp index 880f4856aa0..9d0145a2235 100644 --- a/src/plugins/intel_gpu/tests/test_cases/variable.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/variable.cpp @@ -35,7 +35,7 @@ struct variable_test : public ::testing::TestWithParam> { topology.add(eltwise{"sum", { input_info("input"), input_info("read_value") }, eltwise_mode::sum, {}, variable_layout.data_type}); topology.add(assign{"assign", { input_info("sum") }, "v0", variable_layout}); - network network(engine, topology, build_options{}, false); + network network(engine, topology, ExecutionConfig{}, false); network.assign_variables_memories({ { "v0", std::make_shared(engine.allocate_memory(variable_layout)) } }); network.set_input_data("input", input_data); @@ -123,7 +123,7 @@ TEST(variable_test_common, exception_on_wrong_layout) { topology.add(input_layout("wrong_input", wrong_input_data->get_layout())); topology.add(assign{"assign", { input_info("wrong_input") }, "v0", wrong_layout}); - network network(engine, topology, build_options{}, false); + network network(engine, topology, ExecutionConfig{}, false); network.assign_variables_memories({ { "v0", std::make_shared(engine.allocate_memory(variable_layout)) } }); network.set_input_data("input", input_data); network.set_input_data("wrong_input", wrong_input_data); @@ -174,7 +174,7 @@ TEST(variable_test_common, variables_are_preserved_across_inferences) { topology.add(data("dummy2", dummy2)); topology.add(read_value{"read_result", { input_info("dummy2") }, "v_result", variable_layout}); - network network{engine, topology, build_options{}, true}; + network network{engine, topology, ExecutionConfig{}, true}; network.assign_variables_memories({ { "v1", std::make_shared(engine.allocate_memory(variable_layout)) }, { "v2", std::make_shared(engine.allocate_memory(variable_layout)) }, diff --git a/src/plugins/intel_gpu/tests/test_utils/network_test.h b/src/plugins/intel_gpu/tests/test_utils/network_test.h index 15514d96d74..228fc77dfca 100644 --- a/src/plugins/intel_gpu/tests/test_utils/network_test.h +++ b/src/plugins/intel_gpu/tests/test_utils/network_test.h @@ -312,7 +312,7 @@ public: std::shared_ptr> input, std::shared_ptr> weights, std::shared_ptr> bias, - cldnn::implementation_desc force = cldnn::implementation_desc{ cldnn::format::any, "" }) { + ov::intel_gpu::ImplementationDesc force = ov::intel_gpu::ImplementationDesc{ cldnn::format::any, "" }) { topo.add(cldnn::fully_connected(id, input_info(input->id), weights->id, bias->id, cldnn::type_to_data_type::value)); if (force.output_format != cldnn::format::any || force.kernel_name != "") forced_impls[id] = force; @@ -327,7 +327,7 @@ public: std::shared_ptr> input, std::shared_ptr> weights, std::shared_ptr> bias, - cldnn::implementation_desc force = cldnn::implementation_desc{cldnn::format::any, ""}, + ov::intel_gpu::ImplementationDesc force = ov::intel_gpu::ImplementationDesc{cldnn::format::any, ""}, size_t input_dim_size = 3) { topo.add(cldnn::fully_connected(id, input_info(input->id), weights->id, bias->id, cldnn::type_to_data_type::value, cldnn::padding(), input_dim_size)); if (force.output_format != cldnn::format::any || force.kernel_name != "") @@ -338,14 +338,14 @@ public: return add_node(id, reference_tensor_typed(output_data), {input, weights, bias}); } - cldnn::network::ptr build_network(cldnn::build_options opts, bool is_caching_test=false) { - opts.set_option(cldnn::build_option::force_implementations(forced_impls)); + cldnn::network::ptr build_network(ExecutionConfig config, bool is_caching_test=false) { + config.set_property(ov::intel_gpu::force_implementations(forced_impls)); cldnn::network::ptr net; if (is_caching_test) { membuf mem_buf; { - cldnn::network _network(eng, topo, opts); + cldnn::network _network(eng, topo, config); std::ostream out_mem(&mem_buf); BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem); _network.save(ob); @@ -356,7 +356,7 @@ public: net = std::make_shared(ib, get_test_stream_ptr(), eng); } } else { - net = std::make_shared(eng, topo, opts); + net = std::make_shared(eng, topo, config); } for (auto& in_data : inputs) { @@ -365,8 +365,8 @@ public: return net; } - void run(cldnn::build_options opts, bool is_caching_test=false) { - auto net = build_network(opts, is_caching_test); + void run(ExecutionConfig config, bool is_caching_test=false) { + auto net = build_network(config, is_caching_test); if (!is_caching_test) { std::stringstream network_info; network_info << "Executed kernels: " << std::endl; @@ -409,7 +409,7 @@ protected: cldnn::engine& eng; cldnn::topology topo; - std::map forced_impls; + std::map forced_impls; std::map inputs; std::set outputs; }; diff --git a/src/plugins/intel_gpu/tests/test_utils/test_utils.cpp b/src/plugins/intel_gpu/tests/test_utils/test_utils.cpp index 9f7d4188673..a8ae98e6fa1 100644 --- a/src/plugins/intel_gpu/tests/test_utils/test_utils.cpp +++ b/src/plugins/intel_gpu/tests/test_utils/test_utils.cpp @@ -59,14 +59,14 @@ void generic_test::run_single_test() { } } std::string input_name = "input" + std::to_string(i); - if ((i == 0) && generic_params->network_build_options.get()->enabled()) { + if ((i == 0) && generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { // Add reorder after the first input in case of optimize data flag since it might change the input layout. input_name = "input0_init"; } // First input is provided to the network as input_layout. // Other inputs are provided as input_layout if optimize data flag is off. Otherwise they are provided as data. - if ((i == 0) || !generic_params->network_build_options.get()->enabled()) { + if ((i == 0) || !generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { topology.add(input_layout(input_name, input_mems[i]->get_layout())); input_layouts_names.push_back(input_name); } else { @@ -79,7 +79,7 @@ void generic_test::run_single_test() { } } - if (generic_params->network_build_options.get()->enabled()) { + if (generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { // Add reorder after the first input in case of optimize data flag since it might change the input layout. topology.add(reorder("input0", input_info("input0_init"), input_mems[0]->get_layout())); } @@ -91,7 +91,7 @@ void generic_test::run_single_test() { prepare_input_for_test(input_mems); - network network(engine, topology, generic_params->network_build_options); + network network(engine, topology, generic_params->network_config); for (size_t i = 0 ; i < input_layouts_names.size() ; i++) { network.set_input_data(input_layouts_names[i], input_mems[i]); @@ -286,50 +286,25 @@ std::vector> generic_test::generate_generic_test_pa return all_generic_params; } -static cldnn::engine_configuration get_test_engine_config(cldnn::queue_types queue_type) { - const bool enable_profiling = false; - std::string sources_dumps_dir = ""; - priority_mode_types priority_mode = priority_mode_types::disabled; - throttle_mode_types throttle_mode = throttle_mode_types::disabled; - bool use_memory_pool = true; - bool use_unified_shared_memory = true; - return engine_configuration(enable_profiling, queue_type, sources_dumps_dir, priority_mode, throttle_mode, use_memory_pool, use_unified_shared_memory); -} - -std::shared_ptr create_test_engine(cldnn::queue_types queue_type) { - return cldnn::engine::create(engine_types::ocl, runtime_types::ocl, get_test_engine_config(queue_type)); +std::shared_ptr create_test_engine() { + return cldnn::engine::create(engine_types::ocl, runtime_types::ocl); } cldnn::engine& get_test_engine() { static std::shared_ptr test_engine = nullptr; if (!test_engine) { - test_engine = create_test_engine(cldnn::queue_types::out_of_order); + test_engine = create_test_engine(); } return *test_engine; } -cldnn::engine& get_test_engine(const cldnn::engine_configuration& configuration) { - static std::shared_ptr test_engine = nullptr; - if (!test_engine) { - test_engine = cldnn::engine::create(engine_types::ocl, runtime_types::ocl, configuration); - } - return *test_engine; -} - -#ifdef ENABLE_ONEDNN_FOR_GPU -cldnn::engine& get_onednn_test_engine() { - static std::shared_ptr test_engine = nullptr; - if (!test_engine) { - test_engine = create_test_engine(cldnn::queue_types::in_order); - } - return *test_engine; -} -#endif - cldnn::stream_ptr get_test_stream_ptr() { static std::shared_ptr test_stream = nullptr; - if (!test_stream) - test_stream = get_test_engine().create_stream(); + if (!test_stream) { + // Create OOO queue for test purposes. If in-order queue is needed in a test, then it should be created there explicitly + ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); + test_stream = get_test_engine().create_stream(cfg); + } return test_stream; } diff --git a/src/plugins/intel_gpu/tests/test_utils/test_utils.h b/src/plugins/intel_gpu/tests/test_utils/test_utils.h index e8ed3007bdc..f9a72b89d29 100644 --- a/src/plugins/intel_gpu/tests/test_utils/test_utils.h +++ b/src/plugins/intel_gpu/tests/test_utils/test_utils.h @@ -52,12 +52,8 @@ struct type_to_data_type { namespace tests { -std::shared_ptr create_test_engine(cldnn::queue_types queue_type = cldnn::queue_types::out_of_order); +std::shared_ptr create_test_engine(); cldnn::engine& get_test_engine(); -cldnn::engine& get_test_engine(const cldnn::engine_configuration& configuration); -#ifdef ENABLE_ONEDNN_FOR_GPU -cldnn::engine& get_onednn_test_engine(); -#endif cldnn::stream_ptr get_test_stream_ptr(); cldnn::stream& get_test_stream(); @@ -412,10 +408,10 @@ public: test_params() : fmt(cldnn::format::bfyx) { } - test_params(cldnn::data_types dt, cldnn::format input_format, int32_t batch_size, int32_t feature_size, cldnn::tensor input_size, cldnn::build_options const& options = cldnn::build_options()) : + test_params(cldnn::data_types dt, cldnn::format input_format, int32_t batch_size, int32_t feature_size, cldnn::tensor input_size, ExecutionConfig config = {}) : data_type(dt), fmt(input_format), - network_build_options(options) { + network_config(config) { cldnn::tensor t = cldnn::tensor(batch_size, feature_size, input_size.spatial[0], input_size.spatial[1] ); input_layouts.push_back( cldnn::layout(dt, fmt, t) ); } @@ -426,7 +422,7 @@ public: void * opaque_custom_param = nullptr; - cldnn::build_options network_build_options; + ExecutionConfig network_config; std::string print(); static std::string print_tensor(cldnn::tensor tensor); @@ -467,8 +463,6 @@ public: static std::vector> generate_generic_test_params(std::vector>& all_generic_params); - static void dump_graph(const std::string test_name, cldnn::build_options& bo); - virtual bool is_format_supported(cldnn::format format) = 0; virtual cldnn::tensor get_expected_output_tensor(); @@ -488,7 +482,6 @@ protected: std::shared_ptr layer_params; int max_ulps_diff_allowed; //Max number of ulps allowed between 2 values when comparing the output buffer and the reference buffer. bool random_values; // if set memory buffers will be filled with random values - bool dump_graphs; // if set tests will dump graphs to file bool dump_memory; // if set memory buffers will be dumped to file virtual cldnn::memory::ptr generate_reference(const std::vector& inputs) = 0; // Allows the test to override the random input data that the framework generates diff --git a/src/tests/functional/plugin/gpu/concurrency/gpu_concurrency_tests.cpp b/src/tests/functional/plugin/gpu/concurrency/gpu_concurrency_tests.cpp index ed91286008e..da95d111b12 100644 --- a/src/tests/functional/plugin/gpu/concurrency/gpu_concurrency_tests.cpp +++ b/src/tests/functional/plugin/gpu/concurrency/gpu_concurrency_tests.cpp @@ -55,7 +55,7 @@ TEST_P(OVConcurrencyTest, canInferTwoExecNets) { auto fn = fn_ptrs[i]; auto exec_net = ie.compile_model(fn_ptrs[i], CommonTestUtils::DEVICE_GPU, - {ov::num_streams(num_streams), ov::hint::inference_precision(ov::element::f32)}); + ov::num_streams(num_streams), ov::hint::inference_precision(ov::element::f32)); auto input = fn_ptrs[i]->get_parameters().at(0); auto output = fn_ptrs[i]->get_results().at(0); diff --git a/src/tests/functional/plugin/gpu/remote_blob_tests/dx11_remote_ctx_test.cpp b/src/tests/functional/plugin/gpu/remote_blob_tests/dx11_remote_ctx_test.cpp index 03255464534..481bda94793 100644 --- a/src/tests/functional/plugin/gpu/remote_blob_tests/dx11_remote_ctx_test.cpp +++ b/src/tests/functional/plugin/gpu/remote_blob_tests/dx11_remote_ctx_test.cpp @@ -14,6 +14,7 @@ #include #include #include "ngraph_functions/subgraph_builders.hpp" +#include #ifdef _WIN32 #ifdef ENABLE_DX11 @@ -33,6 +34,7 @@ #endif #include +#include #include #include #include @@ -48,8 +50,6 @@ #endif using namespace ::testing; -using namespace InferenceEngine; -using namespace InferenceEngine::gpu; struct DX11RemoteCtx_Test : public CommonTestUtils::TestsCommon { virtual ~DX11RemoteCtx_Test() = default; @@ -151,6 +151,8 @@ TEST_F(DX11RemoteCtx_Test, smoke_make_shared_context) { #if defined(ANDROID) GTEST_SKIP(); #endif + using namespace InferenceEngine; + using namespace InferenceEngine::gpu; auto ie = InferenceEngine::Core(); CComPtr device_ptr; @@ -180,6 +182,8 @@ TEST_F(DX11CachedTexture_Test, smoke_make_shared_nv12_blob_cached) { #if defined(ANDROID) GTEST_SKIP(); #endif + using namespace InferenceEngine; + using namespace InferenceEngine::gpu; auto ie = InferenceEngine::Core(); auto remote_context = make_shared_context(ie, CommonTestUtils::DEVICE_GPU, device_ptr); @@ -200,7 +204,8 @@ TEST_F(DX11CachedTexture_Test, _make_shared_nv12_blob_cached_inference) { #if defined(ANDROID) GTEST_SKIP(); #endif - + using namespace InferenceEngine; + using namespace InferenceEngine::gpu; // inference using remote blob with batch auto fn_ptr_remote = ngraph::builder::subgraph::makeConvPoolRelu({1, 3, texture_description.Height, texture_description.Width}); auto ie = InferenceEngine::Core(); @@ -237,6 +242,56 @@ TEST_F(DX11CachedTexture_Test, _make_shared_nv12_blob_cached_inference) { } } +TEST_F(DX11CachedTexture_Test, smoke_make_shared_nv12_tensor_cached) { +#if defined(ANDROID) + GTEST_SKIP(); +#endif + ov::Core core; + ov::intel_gpu::ocl::D3DContext context(core, device_ptr); + const size_t total_run_number = 4; + for (size_t i = 0; i < total_run_number; i++) { + for (const auto& t : dx11_textures) { + ASSERT_NO_THROW(auto tensor = context.create_tensor_nv12(texture_description.Height, texture_description.Width, t)); + } + } +} + +TEST_F(DX11CachedTexture_Test, _make_shared_nv12_tensor_cached_inference) { +#if defined(ANDROID) + GTEST_SKIP(); +#endif + // inference using remote blob with batch + auto fn_ptr_remote = ngraph::builder::subgraph::makeConvPoolRelu({1, 3, texture_description.Height, texture_description.Width}); + ov::Core core; + ov::intel_gpu::ocl::D3DContext context(core, device_ptr); + + using namespace ov::preprocess; + auto p = PrePostProcessor(fn_ptr_remote); + p.input().tensor().set_element_type(ov::element::u8) + .set_color_format(ov::preprocess::ColorFormat::NV12_TWO_PLANES, {"y", "uv"}) + .set_memory_type(GPU_CONFIG_KEY(SURFACE)); + p.input().preprocess().convert_color(ov::preprocess::ColorFormat::BGR); + p.input().model().set_layout("NCHW"); + auto model = p.build(); + + auto param_input_y = model->get_parameters().at(0); + auto param_input_uv = model->get_parameters().at(1); + + const size_t total_run_number = 4; + + auto compiled_model = core.compile_model(model, context); + auto request = compiled_model.create_infer_request(); + + const size_t iteration_count = 10; + for (size_t i = 0; i < iteration_count; i++) { + auto tensor = context.create_tensor_nv12(texture_description.Height, texture_description.Width, dx11_textures[0]); + request.set_tensor(param_input_y, tensor.first); + request.set_tensor(param_input_uv, tensor.second); + + ASSERT_NO_THROW(request.infer()); + auto output_tensor = request.get_tensor(model->get_results().at(0)); + } +} #endif // ENABLE_DX11 #endif // WIN32 diff --git a/src/tests/functional/plugin/gpu/remote_blob_tests/gpu_remote_tensor_tests.cpp b/src/tests/functional/plugin/gpu/remote_blob_tests/gpu_remote_tensor_tests.cpp index 3c43f23791a..804e9666ed7 100644 --- a/src/tests/functional/plugin/gpu/remote_blob_tests/gpu_remote_tensor_tests.cpp +++ b/src/tests/functional/plugin/gpu/remote_blob_tests/gpu_remote_tensor_tests.cpp @@ -1541,6 +1541,52 @@ TEST_P(OVRemoteTensorBatched_Test, NV12toBGR_buffer) { const std::vector num_batches{ 1, 2, 4 }; INSTANTIATE_TEST_SUITE_P(smoke_RemoteTensor, OVRemoteTensorBatched_Test, ::testing::ValuesIn(num_batches), OVRemoteTensorBatched_Test::getTestCaseName); +static void check_contexts_are_same(const ov::RemoteContext& c1, const ov::RemoteContext& c2) { + ASSERT_EQ(c1.get_device_name(), c2.get_device_name()); + + // If we support other context type this check must be replaced + ASSERT_TRUE(c1.is()); + ASSERT_TRUE(c2.is()); + + auto c1_casted = c1.as(); + auto c2_casted = c2.as(); + + ASSERT_EQ(c1_casted.get(), c2_casted.get()); +} + +TEST(OVRemoteContextGPU, smoke_CustomContextDeviceNames) { +#if defined(ANDROID) + GTEST_SKIP(); +#endif + auto core = ov::Core(); + std::vector gpuDevices; + std::vector availableDevices = core.get_available_devices(); + + std::for_each(availableDevices.begin(), availableDevices.end(), [&](const std::string& device){ + if (device.find(CommonTestUtils::DEVICE_GPU) != std::string::npos) + gpuDevices.push_back(device); + }); + + for (size_t i = 0; i < gpuDevices.size(); i++) { + auto device_name = "GPU." + std::to_string(i); + auto ctx = core.get_default_context(device_name).as(); + cl::Context original_ctx_handle = ctx; + std::vector devices = original_ctx_handle.getInfo(); + cl::Context new_ctx_handle(devices); + ASSERT_NE(new_ctx_handle.get(), original_ctx_handle.get()); + auto remote_context = ov::intel_gpu::ocl::ClContext(core, new_ctx_handle.get(), 0); + ASSERT_EQ(remote_context.get_device_name(), device_name); + + // Check that ctx_device_id doesn't impact device name reported by context + cl::Context new_ctx_handle_md({devices.front(), devices.front()}); + ASSERT_NE(original_ctx_handle.get(), new_ctx_handle_md.get()); + auto remote_context0 = ov::intel_gpu::ocl::ClContext(core, new_ctx_handle_md.get(), 0); + auto remote_context1 = ov::intel_gpu::ocl::ClContext(core, new_ctx_handle_md.get(), 1); + ASSERT_EQ(remote_context0.get_device_name(), device_name); + ASSERT_EQ(remote_context1.get_device_name(), device_name); + } +} + TEST(OVRemoteContextGPU, smoke_RemoteContextPerDevice) { #if defined(ANDROID) GTEST_SKIP(); @@ -1597,8 +1643,7 @@ TEST(OVRemoteContextGPU, smoke_RemoteContextCaching) { auto defaultContextFirst = core.get_default_context(gpuDeviceFirst).as(); // Check devices names ASSERT_EQ(defaultContextFirst.get_device_name(), gpuDeviceFirst); - // Check underlying OpenCL context handles - ASSERT_EQ(compiledModelFirstContext.get(), defaultContextFirst.get()); + check_contexts_are_same(compiledModelFirstContext, defaultContextFirst); auto defaultContextSecond = core.get_default_context(gpuDeviceSecond).as(); // Check devices names @@ -1609,3 +1654,49 @@ TEST(OVRemoteContextGPU, smoke_RemoteContextCaching) { // Expect different contexts for different devices ASSERT_NE(compiledModelFirstContext.get(), compiledModelSecondContext.get()); } + +TEST(OVRemoteContextGPU, smoke_RemoteContextSingleDevice) { +#if defined(ANDROID) + GTEST_SKIP(); +#endif + auto core = ov::Core(); + + auto default_ctx = core.get_default_context(CommonTestUtils::DEVICE_GPU).as(); + + // Same context returned for multple calls + check_contexts_are_same(default_ctx, core.get_default_context(CommonTestUtils::DEVICE_GPU)); + + // Set some properties which could impact engine config and check context again + core.set_property(CommonTestUtils::DEVICE_GPU, ov::streams::num(2)); + core.set_property(CommonTestUtils::DEVICE_GPU, ov::intel_gpu::hint::queue_throttle(ov::intel_gpu::hint::ThrottleLevel::LOW)); + core.set_property(CommonTestUtils::DEVICE_GPU, ov::enable_profiling(true)); + check_contexts_are_same(default_ctx, core.get_default_context(CommonTestUtils::DEVICE_GPU)); + + // Ensure compiled model uses default context too + auto model = ngraph::builder::subgraph::makeConvertTranspose(); + auto compiled_model = core.compile_model(model, CommonTestUtils::DEVICE_GPU); + check_contexts_are_same(default_ctx, compiled_model.get_context()); + ASSERT_EQ(2, compiled_model.get_property(ov::streams::num)); + + auto ocl_instance = std::make_shared(); + cl::Context default_ctx_handle = default_ctx; + auto default_devices = default_ctx_handle.getInfo(); + ASSERT_EQ(default_devices.size(), 1); + cl::Device default_device_handle(default_devices[0]); + // OCL instance looks for intel GPUs, so skip this part if CommonTestUtils::DEVICE_GPU points to GPU from other vendor + if (default_device_handle.getInfo() == 0x8086) { + ov::intel_gpu::ocl::ClContext custom_ctx(core, ocl_instance->_queue.get()); + auto compiled_model_custom_ctx = core.compile_model(model, custom_ctx, ov::streams::num(1)); + auto model_ctx = compiled_model_custom_ctx.get_context().as(); + + // Check that compiled model uses custom context + check_contexts_are_same(custom_ctx, model_ctx); + ASSERT_EQ(1, compiled_model_custom_ctx.get_property(ov::streams::num)); + + // Check that handle differs in default context and compiled model created with custom ctx + ASSERT_NE(default_ctx.get(), model_ctx.get()); + + // Check that default ctx is untouched + check_contexts_are_same(default_ctx, core.get_default_context(CommonTestUtils::DEVICE_GPU)); + } +} diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp index fe5ad0951b5..320dafe03a5 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp @@ -362,17 +362,19 @@ TEST_P(OVClassGetPropertyTest_GPU, GetAndSetModelPriorityNoThrow) { ov::hint::Priority defaultValue{}; ASSERT_NO_THROW(defaultValue = ie.get_property(target_device, ov::hint::model_priority)); - std::cout << "Default PERF_COUNT: " << defaultValue << std::endl; + std::cout << "Default model_priority: " << defaultValue << std::endl; ie.set_property(target_device, ov::hint::model_priority(ov::hint::Priority::HIGH)); ASSERT_EQ(ov::hint::Priority::HIGH, ie.get_property(target_device, ov::hint::model_priority)); - ASSERT_EQ(ov::hint::Priority::HIGH, ie.get_property(target_device, ov::intel_gpu::hint::queue_priority)); + ASSERT_EQ(ov::hint::Priority::MEDIUM, ie.get_property(target_device, ov::intel_gpu::hint::queue_priority)); ie.set_property(target_device, ov::hint::model_priority(ov::hint::Priority::LOW)); ASSERT_EQ(ov::hint::Priority::LOW, ie.get_property(target_device, ov::hint::model_priority)); - ASSERT_EQ(ov::hint::Priority::LOW, ie.get_property(target_device, ov::intel_gpu::hint::queue_priority)); + ASSERT_EQ(ov::hint::Priority::MEDIUM, ie.get_property(target_device, ov::intel_gpu::hint::queue_priority)); ie.set_property(target_device, ov::hint::model_priority(ov::hint::Priority::MEDIUM)); ASSERT_EQ(ov::hint::Priority::MEDIUM, ie.get_property(target_device, ov::hint::model_priority)); ASSERT_EQ(ov::hint::Priority::MEDIUM, ie.get_property(target_device, ov::intel_gpu::hint::queue_priority)); + ie.set_property(target_device, ov::intel_gpu::hint::queue_priority(ov::hint::Priority::HIGH)); + ASSERT_EQ(ov::hint::Priority::HIGH, ie.get_property(target_device, ov::intel_gpu::hint::queue_priority)); OV_ASSERT_PROPERTY_SUPPORTED(ov::hint::model_priority); }