diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/device_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/device_config.hpp
index 227390ee2ef..364f1159238 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/device_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/device_config.hpp
@@ -32,7 +32,7 @@ struct Config {
                                           max_dynamic_batch(1),
                                           customLayers({}),
                                           kernels_cache_dir(""),
-                                          inference_precision(ov::element::undefined),
+                                          inference_precision(ov::element::f16),
                                           task_exec_config({"GPU plugin internal task executor",                        // name
                                                     std::max(1, static_cast<int>(std::thread::hardware_concurrency())), // # of streams
                                                     1,                                                                  // # of threads per streams
diff --git a/src/tests/functional/plugin/gpu/concurrency/gpu_concurrency_tests.cpp b/src/tests/functional/plugin/gpu/concurrency/gpu_concurrency_tests.cpp
index b8156314737..ed91286008e 100644
--- a/src/tests/functional/plugin/gpu/concurrency/gpu_concurrency_tests.cpp
+++ b/src/tests/functional/plugin/gpu/concurrency/gpu_concurrency_tests.cpp
@@ -55,7 +55,7 @@ TEST_P(OVConcurrencyTest, canInferTwoExecNets) {
         auto fn = fn_ptrs[i];
 
         auto exec_net = ie.compile_model(fn_ptrs[i], CommonTestUtils::DEVICE_GPU,
-                                         {{ov::ie::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, std::to_string(num_streams)}});
+                                         {ov::num_streams(num_streams), ov::hint::inference_precision(ov::element::f32)});
 
         auto input = fn_ptrs[i]->get_parameters().at(0);
         auto output = fn_ptrs[i]->get_results().at(0);
@@ -115,7 +115,7 @@ TEST(canSwapTensorsBetweenInferRequests, inputs) {
     auto fn = ngraph::builder::subgraph::makeSplitMultiConvConcat();
 
     auto ie = ov::Core();
-    auto compiled_model = ie.compile_model(fn, CommonTestUtils::DEVICE_GPU);
+    auto compiled_model = ie.compile_model(fn, CommonTestUtils::DEVICE_GPU, ov::hint::inference_precision(ov::element::f32));
 
     const int infer_requests_num = 2;
     ov::InferRequest infer_request1 = compiled_model.create_infer_request();
@@ -193,7 +193,7 @@ TEST(smoke_InferRequestDeviceMemoryAllocation, usmHostIsNotChanged) {
     auto fn = ngraph::builder::subgraph::makeDetectionOutput(ngraph::element::Type_t::f32);
 
     auto ie = ov::Core();
-    auto compiled_model = ie.compile_model(fn, CommonTestUtils::DEVICE_GPU);
+    auto compiled_model = ie.compile_model(fn, CommonTestUtils::DEVICE_GPU, ov::hint::inference_precision(ov::element::f32));
 
     ov::InferRequest infer_request1 = compiled_model.create_infer_request();
     ov::InferRequest infer_request2 = compiled_model.create_infer_request();
@@ -232,7 +232,7 @@ TEST(smoke_InferRequestDeviceMemoryAllocation, canSetSystemHostTensor) {
     auto fn = ngraph::builder::subgraph::makeDetectionOutput(ngraph::element::Type_t::f32);
 
     auto ie = ov::Core();
-    auto compiled_model = ie.compile_model(fn, CommonTestUtils::DEVICE_GPU);
+    auto compiled_model = ie.compile_model(fn, CommonTestUtils::DEVICE_GPU, ov::hint::inference_precision(ov::element::f32));
 
     ov::InferRequest infer_request1 = compiled_model.create_infer_request();
     ov::InferRequest infer_request2 = compiled_model.create_infer_request();
@@ -258,7 +258,7 @@ TEST(canSwapTensorsBetweenInferRequests, outputs) {
     auto fn = ngraph::builder::subgraph::makeSplitMultiConvConcat();
 
     auto ie = ov::Core();
-    auto compiled_model = ie.compile_model(fn, CommonTestUtils::DEVICE_GPU);
+    auto compiled_model = ie.compile_model(fn, CommonTestUtils::DEVICE_GPU, ov::hint::inference_precision(ov::element::f32));
 
     const int infer_requests_num = 2;
     ov::InferRequest infer_request1 = compiled_model.create_infer_request();
diff --git a/src/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp b/src/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
index 9cb8db61e15..041bb1aea2c 100644
--- a/src/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
+++ b/src/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
@@ -40,6 +40,7 @@ public:
                             {CONFIG_KEY(AUTO_BATCH_TIMEOUT) , "0"},
                             };
             }
+        config.insert({ov::hint::inference_precision.name(), "f32"});
         fn_ptr = ov::test::behavior::getDefaultNGraphFunctionForTheDevice(with_auto_batching ? CommonTestUtils::DEVICE_BATCH : deviceName);
     }
     static std::string getTestCaseName(const testing::TestParamInfo<bool>& obj) {
@@ -229,7 +230,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserContext) {
     auto blob = FuncTestUtils::createAndFillBlob(net.getInputsInfo().begin()->second->getTensorDesc());
 
     auto ie = PluginCache::get().ie();
-    auto exec_net_regular = ie->LoadNetwork(net, deviceName);
+    auto exec_net_regular = ie->LoadNetwork(net, deviceName, {{ov::hint::inference_precision.name(), "f32"}});
 
     // regular inference
     auto inf_req_regular = exec_net_regular.CreateInferRequest();
@@ -276,7 +277,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_out_of_order) {
     auto blob = FuncTestUtils::createAndFillBlob(net.getInputsInfo().begin()->second->getTensorDesc());
 
     auto ie = PluginCache::get().ie();
-    auto exec_net_regular = ie->LoadNetwork(net, deviceName);
+    auto exec_net_regular = ie->LoadNetwork(net, deviceName, {{ov::hint::inference_precision.name(), "f32"}});
 
     // regular inference
     auto inf_req_regular = exec_net_regular.CreateInferRequest();
@@ -304,7 +305,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_out_of_order) {
     // In this scenario we create shared OCL queue and run simple pre-process action and post-process action (buffer copies in both cases)
     // without calling thread blocks
     auto remote_context = make_shared_context(*ie, deviceName, ocl_instance->_queue.get());
-    auto exec_net_shared = ie->LoadNetwork(net, remote_context); // no auto-batching support, so no config is passed
+    auto exec_net_shared = ie->LoadNetwork(net, remote_context, {{ov::hint::inference_precision.name(), "f32"}});
     auto inf_req_shared = exec_net_shared.CreateInferRequest();
 
     // Allocate shared buffers for input and output data which will be set to infer request
@@ -374,7 +375,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_in_order) {
     auto blob = FuncTestUtils::createAndFillBlob(net.getInputsInfo().begin()->second->getTensorDesc());
 
     auto ie = PluginCache::get().ie();
-    auto exec_net_regular = ie->LoadNetwork(net, deviceName);
+    auto exec_net_regular = ie->LoadNetwork(net, deviceName, {{ov::hint::inference_precision.name(), "f32"}});
 
     // regular inference
     auto inf_req_regular = exec_net_regular.CreateInferRequest();
@@ -403,7 +404,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_in_order) {
     // In this scenario we create shared OCL queue and run simple pre-process action and post-process action (buffer copies in both cases)
     // without calling thread blocks
     auto remote_context = make_shared_context(*ie, deviceName, ocl_instance->_queue.get());
-    auto exec_net_shared = ie->LoadNetwork(net, remote_context); // no auto-batching support, so no config is passed
+    auto exec_net_shared = ie->LoadNetwork(net, remote_context, {{ov::hint::inference_precision.name(), "f32"}});
     auto inf_req_shared = exec_net_shared.CreateInferRequest();
 
     // Allocate shared buffers for input and output data which will be set to infer request
@@ -468,7 +469,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_infer_call_many_times) {
     auto blob = FuncTestUtils::createAndFillBlob(net.getInputsInfo().begin()->second->getTensorDesc());
 
     auto ie = PluginCache::get().ie();
-    auto exec_net_regular = ie->LoadNetwork(net, deviceName);
+    auto exec_net_regular = ie->LoadNetwork(net, deviceName, {{ov::hint::inference_precision.name(), "f32"}});
 
     // regular inference
     auto inf_req_regular = exec_net_regular.CreateInferRequest();
@@ -497,7 +498,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_infer_call_many_times) {
     // In this scenario we create shared OCL queue and run simple pre-process action and post-process action (buffer copies in both cases)
     // without calling thread blocks
     auto remote_context = make_shared_context(*ie, deviceName, ocl_instance->_queue.get());
-    auto exec_net_shared = ie->LoadNetwork(net, remote_context); // no auto-batching support, so no config is passed
+    auto exec_net_shared = ie->LoadNetwork(net, remote_context, {{ov::hint::inference_precision.name(), "f32"}});
     auto inf_req_shared = exec_net_shared.CreateInferRequest();
 
     // Allocate shared buffers for input and output data which will be set to infer request
@@ -600,7 +601,7 @@ TEST_P(BatchedBlob_Test, canInputNV12) {
 
     /* XXX: is it correct to set KEY_CLDNN_NV12_TWO_INPUTS in case of remote blob? */
     auto exec_net_b = ie.LoadNetwork(net_remote, CommonTestUtils::DEVICE_GPU,
-                { { GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS, PluginConfigParams::YES} });
+                { { GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS, PluginConfigParams::YES}, {ov::hint::inference_precision.name(), "f32"} });
     auto inf_req_remote = exec_net_b.CreateInferRequest();
     auto cldnn_context = exec_net_b.GetContext();
     cl_context ctx = std::dynamic_pointer_cast<ClContext>(cldnn_context)->get();
@@ -669,7 +670,7 @@ TEST_P(BatchedBlob_Test, canInputNV12) {
     net_local.getInputsInfo().begin()->second->setPrecision(Precision::U8);
     net_local.getInputsInfo().begin()->second->getPreProcess().setColorFormat(ColorFormat::NV12);
 
-    auto exec_net_b1 = ie.LoadNetwork(net_local, CommonTestUtils::DEVICE_GPU);
+    auto exec_net_b1 = ie.LoadNetwork(net_local, CommonTestUtils::DEVICE_GPU, {{ov::hint::inference_precision.name(), "f32"}});
 
     auto inf_req_local = exec_net_b1.CreateInferRequest();
 
@@ -740,7 +741,8 @@ TEST_P(TwoNets_Test, canInferTwoExecNets) {
         net.getInputsInfo().begin()->second->setPrecision(Precision::FP32);
 
         auto exec_net = ie.LoadNetwork(net, CommonTestUtils::DEVICE_GPU,
-                                       {{PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, std::to_string(num_streams)}});
+                                       {{PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, std::to_string(num_streams)},
+                                        {ov::hint::inference_precision.name(), "f32"}});
 
         for (int j = 0; j < num_streams * num_requests; j++) {
             outputs.push_back(net.getOutputsInfo().begin()->first);
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index d26778d07d5..fe5ad0951b5 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -344,12 +344,12 @@ TEST_P(OVClassGetPropertyTest_GPU, GetAndSetEnableProfilingNoThrow) {
 TEST_P(OVClassGetPropertyTest_GPU, GetAndSetInferencePrecisionNoThrow) {
     ov::Core ie;
     auto value = ov::element::undefined;
-    const auto expected_default_precision = ov::element::undefined;
+    const auto expected_default_precision = ov::element::f16;
 
     OV_ASSERT_NO_THROW(value = ie.get_property(target_device, ov::hint::inference_precision));
     ASSERT_EQ(expected_default_precision, value);
 
-    const auto forced_precision = ov::element::f16;
+    const auto forced_precision = ov::element::f32;
 
     OV_ASSERT_NO_THROW(ie.set_property(target_device, ov::hint::inference_precision(forced_precision)));
     OV_ASSERT_NO_THROW(value = ie.get_property(target_device, ov::hint::inference_precision));
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/core_config.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/core_config.cpp
index 5e7e7675443..138101533f4 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/core_config.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/core_config.cpp
@@ -5,4 +5,16 @@
 #include "functional_test_utils/core_config.hpp"
 
 void CoreConfiguration(LayerTestsUtils::LayerTestsCommon* test) {
+    std::shared_ptr<InferenceEngine::Core> core = PluginCache::get().ie();
+    ov::element::Type hint = ov::element::f32;
+    for (auto& param : test->GetFunction()->get_parameters()) {
+        if (param->get_output_element_type(0) == ov::element::f16) {
+            hint = ov::element::f16;
+            break;
+        }
+    }
+
+    // Set inference_precision hint to run fp32 model in fp32 runtime precision as default plugin execution precision may vary
+    std::map<std::string, std::string> config = {{"INFERENCE_PRECISION_HINT", hint.get_type_name()}};
+    core->SetConfig(config, CommonTestUtils::DEVICE_GPU);
 }
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
index 02a4f39b992..0193d47b053 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
@@ -125,5 +125,8 @@ std::vector<std::string> disabledTestPatterns() {
             R"(.*smoke_GroupConvolution1D_ExplicitPadding_Disabled.*)",
             R"(.*smoke_GroupConvolutionLayerGPUTest_dynamic1DSymPad_Disabled.*)",
             R"(.*smoke_ConvolutionLayerGPUTest_dynamic1DSymPad.*)",
+
+            // Looks like the test is targeting CPU plugin and doesn't respect that execution graph may vary from plugin to plugin
+            R"(.*ExecGraphSerializationTest.*)",
     };
 }
diff --git a/src/tests/functional/plugin/shared/include/behavior/plugin/auto_batching_tests.hpp b/src/tests/functional/plugin/shared/include/behavior/plugin/auto_batching_tests.hpp
index 270eed3dad0..733d1f9246a 100644
--- a/src/tests/functional/plugin/shared/include/behavior/plugin/auto_batching_tests.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/plugin/auto_batching_tests.hpp
@@ -70,8 +70,11 @@ protected:
                 n.second->setPrecision(Precision::FP32);
             }
             std::map<std::string, std::string> config;
-            if (target_device.find("GPU") != std::string::npos)
+            if (target_device.find("GPU") != std::string::npos) {
                 config[CONFIG_KEY(GPU_THROUGHPUT_STREAMS)] = std::to_string(num_streams);
+                config["INFERENCE_PRECISION_HINT"] = "f32";
+            }
+
             if (target_device.find("CPU") != std::string::npos) {
                 config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = std::to_string(num_streams);
                 config[CONFIG_KEY(ENFORCE_BF16)] = CONFIG_VALUE(NO);
diff --git a/src/tests/functional/plugin/shared/src/execution_graph_tests/normalize_l2_decomposition.cpp b/src/tests/functional/plugin/shared/src/execution_graph_tests/normalize_l2_decomposition.cpp
index 006617645a8..fc56c880a13 100644
--- a/src/tests/functional/plugin/shared/src/execution_graph_tests/normalize_l2_decomposition.cpp
+++ b/src/tests/functional/plugin/shared/src/execution_graph_tests/normalize_l2_decomposition.cpp
@@ -11,6 +11,7 @@
 
 #include "functional_test_utils/skip_tests_config.hpp"
 #include "common_test_utils/ngraph_test_utils.hpp"
+#include "common_test_utils/test_constants.hpp"
 #include "execution_graph_tests/normalize_l2_decomposition.hpp"
 
 namespace ExecutionGraphTests {
@@ -33,7 +34,10 @@ TEST_P(ExecGrapDecomposeNormalizeL2, CheckIfDecomposeAppliedForNonContiguousAxes
       const auto model = std::make_shared<ov::Model>(ov::NodeVector{normalize_l2}, ov::ParameterVector{input});
 
       auto core = ov::Core();
-      const auto compiled_model = core.compile_model(model, device_name);
+      ov::AnyMap config;
+      if (device_name == CommonTestUtils::DEVICE_GPU)
+        config.insert(ov::hint::inference_precision(ov::element::f32));
+      const auto compiled_model = core.compile_model(model, device_name, config);
 
       ASSERT_TRUE(model->get_ops().size() < compiled_model.get_runtime_model()->get_ops().size()); // decomposition applied
 }
@@ -50,7 +54,10 @@ TEST_P(ExecGrapDecomposeNormalizeL2, CheckIfDecomposeAppliedForNormalizeOverAllA
       const auto model = std::make_shared<ov::Model>(ov::NodeVector{normalize_l2}, ov::ParameterVector{input});
 
       auto core = ov::Core();
-      const auto compiled_model = core.compile_model(model, device_name);
+      ov::AnyMap config;
+      if (device_name == CommonTestUtils::DEVICE_GPU)
+        config.insert(ov::hint::inference_precision(ov::element::f32));
+      const auto compiled_model = core.compile_model(model, device_name, config);
 
       ASSERT_TRUE(model->get_ops().size() < compiled_model.get_runtime_model()->get_ops().size()); // decomposition applied
 }
@@ -67,7 +74,10 @@ TEST_P(ExecGrapDecomposeNormalizeL2, CheckIfDecomposeNotAppliedForNotSorted) {
       const auto model = std::make_shared<ov::Model>(ov::NodeVector{normalize_l2}, ov::ParameterVector{input});
 
       auto core = ov::Core();
-      const auto compiled_model = core.compile_model(model, device_name);
+      ov::AnyMap config;
+      if (device_name == CommonTestUtils::DEVICE_GPU)
+        config.insert(ov::hint::inference_precision(ov::element::f32));
+      const auto compiled_model = core.compile_model(model, device_name, config);
 
       ASSERT_TRUE(model->get_ops().size() >= compiled_model.get_runtime_model()->get_ops().size()); // decomposition not applied
 }
@@ -84,7 +94,10 @@ TEST_P(ExecGrapDecomposeNormalizeL2, CheckIfDecomposeNotAppliedForSingleAxis) {
       const auto model = std::make_shared<ov::Model>(ov::NodeVector{normalize_l2}, ov::ParameterVector{input});
 
       auto core = ov::Core();
-      const auto compiled_model = core.compile_model(model, device_name);
+      ov::AnyMap config;
+      if (device_name == CommonTestUtils::DEVICE_GPU)
+        config.insert(ov::hint::inference_precision(ov::element::f32));
+      const auto compiled_model = core.compile_model(model, device_name, config);
 
       ASSERT_TRUE(model->get_ops().size() >= compiled_model.get_runtime_model()->get_ops().size()); // decomposition not applied
 }
diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp
index 1d87c79abdd..132fb3efb05 100644
--- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp
+++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp
@@ -216,6 +216,18 @@ void SubgraphBaseTest::compile_model() {
         }
     #endif
 
+    // Set inference_precision hint to run fp32 model in fp32 runtime precision as default plugin execution precision may vary
+    if (targetDevice == CommonTestUtils::DEVICE_GPU) {
+        ov::element::Type hint = ov::element::f32;
+        for (auto& param : function->get_parameters()) {
+            if (param->get_output_element_type(0) == ov::element::f16) {
+                hint = ov::element::f16;
+                break;
+            }
+        }
+        configuration.insert({ov::hint::inference_precision.name(), hint});
+    }
+
     compiledModel = core->compile_model(function, targetDevice, configuration);
     if (is_report_stages) {
         auto end_time = std::chrono::system_clock::now();
diff --git a/tests/layer_tests/common/layer_test_class.py b/tests/layer_tests/common/layer_test_class.py
index 3989bcd27d8..c1c26cd8fc6 100644
--- a/tests/layer_tests/common/layer_test_class.py
+++ b/tests/layer_tests/common/layer_test_class.py
@@ -76,6 +76,11 @@ class CommonLayerTest:
         #     (flag, resp) = ir.compare(ref_net)
         #     assert flag, '\n'.join(resp)
 
+        config = None
+        # GPU default execution precision is FP16, so if we want to check FP32 inference we need to set explicit precision hint
+        if ie_device == 'GPU' and precision == 'FP32':
+            config = {'INFERENCE_PRECISION_HINT' : 'f32'}
+
         if self.use_old_api:
             ie_engine = IEInfer(model=path_to_xml,
                                 weights=path_to_bin,
@@ -93,7 +98,7 @@ class CommonLayerTest:
             inputs_dict = self._prepare_input(ie_engine.get_inputs_info(precision))
 
         # IE infer:
-        infer_res = ie_engine.infer(input_data=inputs_dict, infer_timeout=infer_timeout)
+        infer_res = ie_engine.infer(input_data=inputs_dict, infer_timeout=infer_timeout, config=config)
 
         if hasattr(self, 'skip_framework') and self.skip_framework:
             warnings.warn('Framework is skipped')
diff --git a/tests/layer_tests/common/layer_utils.py b/tests/layer_tests/common/layer_utils.py
index 4cc43d3d075..c2e3152db08 100644
--- a/tests/layer_tests/common/layer_utils.py
+++ b/tests/layer_tests/common/layer_utils.py
@@ -23,14 +23,14 @@ class BaseInfer:
         self.name = name
         self.res = None
 
-    def fw_infer(self, input_data):
+    def fw_infer(self, input_data, config=None):
         raise RuntimeError("This is base class, please implement infer function for the specific framework")
 
     def get_inputs_info(self, precision) -> dict:
         raise RuntimeError("This is base class, please implement get_inputs_info function for the specific framework")
 
-    def infer(self, input_data, infer_timeout=10):
-        self.res = multiprocessing_run(self.fw_infer, [input_data], self.name, infer_timeout)
+    def infer(self, input_data, config=None, infer_timeout=10):
+        self.res = multiprocessing_run(self.fw_infer, [input_data, config], self.name, infer_timeout)
         return self.res
 
 
@@ -41,7 +41,7 @@ class IEInfer(BaseInfer):
         self.model = model
         self.weights = weights
 
-    def fw_infer(self, input_data):
+    def fw_infer(self, input_data, config=None):
 
         print("Inference Engine version: {}".format(ie_get_version()))
         print("Creating IE Core Engine...")
@@ -49,7 +49,7 @@ class IEInfer(BaseInfer):
         print("Reading network files")
         net = ie.read_network(self.model, self.weights)
         print("Loading network")
-        exec_net = ie.load_network(net, self.device)
+        exec_net = ie.load_network(net, self.device, config)
         print("Starting inference")
         result = exec_net.infer(input_data)
 
@@ -78,14 +78,14 @@ class InferAPI20(BaseInfer):
         self.weights = weights
         self.use_new_frontend = use_new_frontend
 
-    def fw_infer(self, input_data):
+    def fw_infer(self, input_data, config=None):
         print("Inference Engine version: {}".format(ie2_get_version()))
         print("Creating IE Core Engine...")
         ie = Core()
         print("Reading network files")
         net = ie.read_model(self.model, self.weights)
         print("Loading network")
-        exec_net = ie.compile_model(net, self.device)
+        exec_net = ie.compile_model(net, self.device, config)
         print("Starting inference")
         request = exec_net.create_infer_request()
         request_result = request.infer(input_data)
diff --git a/tests/layer_tests/common/onnx_layer_test_class.py b/tests/layer_tests/common/onnx_layer_test_class.py
index fa1ad08da50..2fd8eb7b20a 100644
--- a/tests/layer_tests/common/onnx_layer_test_class.py
+++ b/tests/layer_tests/common/onnx_layer_test_class.py
@@ -19,7 +19,7 @@ class OnnxRuntimeInfer(BaseInfer):
         super().__init__('OnnxRuntime')
         self.net = net
 
-    def fw_infer(self, input_data):
+    def fw_infer(self, input_data, config=None):
         import onnxruntime as rt
 
         sess = rt.InferenceSession(self.net)