[IE CLDNN] Removed unused primitives and related structures (#1039)

2020-06-30 22:18:24 +03:00 · 2020-06-30 22:18:24 +03:00 · c9d4e6b934
commit c9d4e6b934
parent 66f620f97e
301 changed files with 58 additions and 31335 deletions
--- a/inference-engine/src/cldnn_engine/cldnn_program.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp
@ -30,7 +30,6 @@
 #include <api/detection_output.hpp>
 #include <api/normalize.hpp>
 #include <api/reshape.hpp>
 #include <api/batch_norm.hpp>
 #include <api/permute.hpp>
 #include <api/split.hpp>
 #include <api/resample.hpp>
@ -1533,49 +1532,11 @@ void Program::CreateBatchNormalizationPrimitive(cldnn::topology& topology, Infer
    cldnn::primitive_id weightID = bnLayerName + "_" + m_scalesTag;
    cldnn::primitive_id biasID = bnLayerName + "_" + m_biasesTag;
 #define _SCALE_BN_OPT
 #ifdef _SCALE_BN_OPT
    // Using scale as an optimization (1 mad instead of mad+rsq)
    // create new blobs for scale shift
    CreateScaleWeightsAndBiasesFromBN(topology, bnLayer, weightID, biasID);
    auto scalePrim = cldnn::scale(bnLayerName, inputPrimitives[0], weightID, biasID);
    topology.add(scalePrim);
 #else
    cldnn::tensor blobTensor(0);
    const auto bnDims = bnLayer->outData[0]->getTensorDesc().getDims();
    switch (bnDims.size()) {
    case 2:
        blobTensor = cldnn::feature(TensorValue(bnDims[1]));
        break;
    case 4:
        blobTensor = cldnn::feature(TensorValue(bnDims[1]));
        break;
    default:
        THROW_CLDNN_EXCEPTION("Batch normalization input doesn't have 2 or 4 dimensions in " << bnLayer->name);
    }
    cldnn::layout blobLayout(
        DataTypeFromPrecision(layer->precision),
        m_defaultFormat,
        blobTensor);
    // Create variance primitive
    cldnn::primitive_id varianceID = bnLayerName + "_" + m_weightsTag;
    varianceID = CreatePrimitiveFromBlob(topology, varianceID, bnLayer->_weights, blobLayout);
    // Create mean primitive
    cldnn::primitive_id meanID = bnLayerName + "_" + m_biasesTag;
    meanID = CreatePrimitiveFromBlob(topology, meanID, bnLayer->_biases, blobLayout);
    auto bnPrim = cldnn::batch_norm(
        bnLayerName,
        inputPrimitives[0],
        meanID,
        varianceID,
        bnLayer->epsilon);
    topology.add(bnPrim);
 #endif  // _SCALE_BN_OPT
    AddPrimitiveToProfiler(bnLayerName, layer);
 }
--- a/inference-engine/src/cldnn_engine/dllmain.cpp
+++ b/inference-engine/src/cldnn_engine/dllmain.cpp
@ -1,22 +0,0 @@
 // Copyright (C) 2018-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 // dllmain.cpp : Defines the entry point for the DLL application.
 #ifdef _WIN32
 #include <windows.h>
 BOOL APIENTRY DllMain(HMODULE hModule,
                      DWORD  ul_reason_for_call,
                      LPVOID lpReserved) {
    switch (ul_reason_for_call) {
    case DLL_PROCESS_ATTACH:
    case DLL_THREAD_ATTACH:
    case DLL_THREAD_DETACH:
    case DLL_PROCESS_DETACH:
        break;
    }
    return TRUE;
 }
 #endif
--- a/inference-engine/thirdparty/clDNN/api/activation.hpp
+++ b/inference-engine/thirdparty/clDNN/api/activation.hpp
@ -71,13 +71,6 @@ enum class activation_func {
    gelu                  // (0.5*val*(1 + erf(val / sqrt(2)))
 };
 /// @brief activation gradient functions
 enum class activation_grad_func {
    none,                 // val
    relu,                 // val * (input > 0)
    relu_negative_slope,  // val * ((input > 0) + a * (input <= 0)    (a is additional param)
 };
 /// @brief activation additional params
 struct activation_additional_params {
    float a, b;
--- a/inference-engine/thirdparty/clDNN/api/activation_grad.hpp
+++ b/inference-engine/thirdparty/clDNN/api/activation_grad.hpp
@ -1,96 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "primitive.hpp"
 #include "activation.hpp"
 #include <vector>
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
 /// @{
 /// @addtogroup cpp_topology Network Topology
 /// @{
 /// @addtogroup cpp_primitives Primitives
 /// @{
 /// @brief Activation gradient for rectified linear unit or parameterized rectified linear unit.
 /// @par Algorithm:
 ///   out(i,x,y) = input_gradient(i,x,y) * ((input(i,x,y) > 0) + slope(i)  * (input(i,x,y) <= 0)
 /// @par Where:
 ///   @li out(i,x,y) : value at x, y from i-th feature map after activation.
 ///   @li in(i,x,y) : value at x, y from i-th feature map before activation.
 ///   @li slope(i) : the slope value of the i-th feature map (can be shared across channels or one slope per channel).
 struct activation_grad : public primitive_base<activation_grad> {
    CLDNN_DECLARE_PRIMITIVE(activation_grad)
    /// @brief Constructs Relu grad primitive.
    /// @param id This primitive id.
    /// @param input_grad Input gradient primitive id.
    /// @param input Input primitive id.
    /// @param activation_grad_func activation_grad function.
    /// @param additional_params additional params (slope).
    activation_grad(const primitive_id& id,
                    const primitive_id& input_grad,
                    const primitive_id& input,
                    activation_grad_func activation_grad_function,
                    activation_additional_params additional_params = {0.f, 0.f},
                    const padding& output_padding = padding())
        : primitive_base(id, {input_grad, input}, output_padding),
          activation_grad_function(activation_grad_function),
          additional_params(additional_params),
          additional_params_input("") {}
    /// @brief Constructs Relu grad primitive.
    /// @param id This primitive id.
    /// @param input_grad Input gradient primitive id.
    /// @param input Input primitive id.
    /// @param activation_grad_func activation_grad function.
    /// @param additional_params additional params (slope).
    activation_grad(const primitive_id& id,
                    const primitive_id& input_grad,
                    const primitive_id& input,
                    const primitive_id& additional_params_input,
                    activation_grad_func activation_grad_function,
                    const padding& output_padding = padding())
        : primitive_base(id, {input_grad, input}, output_padding),
          activation_grad_function(activation_grad_function),
          additional_params({0, 0}),
          additional_params_input(additional_params_input) {}
    /// @brief activation_grad function.
    activation_grad_func activation_grad_function;
    /// @brief activation_grad additional params.
    activation_additional_params additional_params;
    /// @brief PRelu activation slope input primitive id.
    /// Input x dimension should be equal to input feature size (one slope per channel).
    /// All other dimensions should be 1.
    primitive_id additional_params_input;
 protected:
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        if (additional_params_input.empty())
            return {};
        return {additional_params_input};
    }
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/api/apply_adam.hpp
+++ b/inference-engine/thirdparty/clDNN/api/apply_adam.hpp
@ -1,111 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "primitive.hpp"
 #include <vector>
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
 /// @{
 /// @addtogroup cpp_topology Network Topology
 /// @{
 /// @addtogroup cpp_primitives Primitives
 /// @{
 /// @brief Apply Adam primitive.
 /// @details Updates output using Adam algorithm. The output of this primitive should be mutable_data type in case user wants to update
 /// variable accross network. If output is not mutable_data then it will be initialized with 0.
 /// "Adam: A Method for Stochastic Optimization" by Diederik P. Kingma, Jimmy Ba
 /// @n See: https://arxiv.org/abs/1412.6980
 ///
 /// <b>Algorithm:</b>
 /// @n float lr[t] = lr * sqrt(1 - beta2^t) / (1 - beta1^t);
 /// @n float m[t] = beta1 * m[t-1] + (1 - beta1) * grad[t];
 /// @n float v[t] = beta2 * v[t-1] + (1 - beta2) * grad[t] * grad[t];
 /// @n float result = result - lr[t] * m[t] / (sqrt(v[t]) + epsilon);
 struct apply_adam : public primitive_base<apply_adam> {
    CLDNN_DECLARE_PRIMITIVE(apply_adam)
    /// @brief Constructs apply Adam primitive.
    /// @param id This primitive id.
    /// @param input Input gradient primitive id.
    /// @param m Primitive id containing mean data.
    /// @param v Primitive id containing variance.
    /// @param beta1_power Primitive id containing beta1^t.
    /// @param beta2_power Primitive id containing beta2^t.
    /// @param lr Learning rate parameter.
    /// @param beta1 Beta1 parameter.
    /// @param beta2 Beta2 parameter.
    /// @param epsilon Epsilon.
    /// @param dependency_id Optional primitive id that need to complete before execution of this primitive. Used only for synchronization.
    apply_adam(const primitive_id& id,
               const primitive_id& input,
               const primitive_id& m,
               const primitive_id& v,
               const primitive_id& beta1_power,
               const primitive_id& beta2_power,
               float lr,
               float beta1,
               float beta2,
               float epsilon,
               const primitive_id& dependency_id = "",
               const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding),
          m(m),
          v(v),
          beta1_power(beta1_power),
          beta2_power(beta2_power),
          lr(lr),
          beta1(beta1),
          beta2(beta2),
          epsilon(epsilon),
          dependency_id(dependency_id) {}
    /// @brief Primitive id containing m data.
    primitive_id m;
    /// @brief Primitive id containing v data.
    primitive_id v;
    /// @brief Primitive id containing beta1^t.
    primitive_id beta1_power;
    /// @brief Primitive id containing beta2^t.
    primitive_id beta2_power;
    /// @brief Learning rate parameter.
    float lr;
    /// @brief Beta1 parameter.
    float beta1;
    /// @brief Beta2 parameter.
    float beta2;
    /// @brief Epsilon.
    float epsilon;
    /// @brief Optional primitive id that need to complete before execution of this primitive. Used only for synchronization.
    primitive_id dependency_id;
 protected:
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        std::vector<std::reference_wrapper<const primitive_id>> ret{m, v, beta1_power, beta2_power};
        ret.reserve(!dependency_id.empty());
        if (!dependency_id.empty())
            ret.push_back(dependency_id);
        return ret;
    }
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/api/batch_norm.hpp
+++ b/inference-engine/thirdparty/clDNN/api/batch_norm.hpp
@ -1,184 +0,0 @@
 /*
 // Copyright (c) 2016 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "primitive.hpp"
 #include <vector>
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
 /// @{
 /// @addtogroup cpp_topology Network Topology
 /// @{
 /// @addtogroup cpp_primitives Primitives
 /// @{
 /// @brief Batch normalization primitive.
 /// @details Performs batch normalization as discribed in
 /// "Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift" by Ioffe, Szegedy
 /// @n See: http://arxiv.org/abs/1502.03167
 ///
 /// <b>Algorithm:</b>
 /// @n global stats can be computed as:
 /// @n out[i] = ( (in[i] - mean[b]) / sqrt(variance[b] + epsilon) ) * scale[b] + shift[b]
 struct batch_norm : public primitive_base<batch_norm> {
    CLDNN_DECLARE_PRIMITIVE(batch_norm)
    /// @brief Constructs batch normalization primitive.
    /// @param id This primitive id.
    /// @param input Input primitive id.
    /// @param mean Primitive id containing mean data.
    /// @param variance Primitive id containing variance.
    /// @param epsilon Epsilon.
    batch_norm(const primitive_id& id,
               const primitive_id& input,
               const primitive_id& mean,
               const primitive_id& variance,
               float epsilon,
               const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding),
          mean(mean),
          variance(variance),
          inv_variance(""),
          epsilon(epsilon) {}
    /// @brief Constructs batch normalization primitive.
    /// @param id This primitive id.
    /// @param input Input primitive id.
    /// @param mean Primitive id containing mean data.
    /// @param variance Primitive id containing variance.
    /// @brief scale Primitive id containing scale.
    /// @brief shift Primitive id containing shift.
    /// @param epsilon Epsilon.
    batch_norm(const primitive_id& id,
               const primitive_id& input,
               const primitive_id& mean,
               const primitive_id& variance,
               const primitive_id& scale,
               const primitive_id& shift,
               float epsilon,
               const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding),
          mean(mean),
          variance(variance),
          scale(scale),
          shift(shift),
          inv_variance(""),
          epsilon(epsilon) {}
    /// @brief Constructs batch normalization primitive with mean and variance calculation (used for training).
    /// @param id This primitive id.
    /// @param input Input primitive id.
    /// @param epsilon Epsilon.
    /// @param inv_variance Primitive id containing inverted variance calculated in this primitive. For inference leave empty.
    batch_norm(const primitive_id& id,
               const primitive_id& input,
               float epsilon,
               const primitive_id& inv_variance = "",
               const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding),
          mean(""),
          variance(""),
          inv_variance(inv_variance),
          epsilon(epsilon) {}
    /// @brief Constructs batch normalization primitive with mean and variance calculation (used for training).
    /// @param id This primitive id.
    /// @param input Input primitive id.
    /// @brief scale Primitive id containing scale.
    /// @brief shift Primitive id containing shift.
    /// @param epsilon Epsilon.
    /// @param inv_variance Primitive id containing inverted variance calculated in this primitive. For inference leave empty.
    batch_norm(const primitive_id& id,
               const primitive_id& input,
               float epsilon,
               const primitive_id& scale,
               const primitive_id& shift,
               const primitive_id& inv_variance = "",
               const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding),
          mean(""),
          variance(""),
          scale(scale),
          shift(shift),
          inv_variance(inv_variance),
          epsilon(epsilon) {}
    /// @brief Constructs batch normalization primitive with mean and variance calculation (used for training).
    /// @param id This primitive id.
    /// @param input Input primitive id.
    /// @brief scale Primitive id containing scale.
    /// @brief shift Primitive id containing shift.
    /// @brief mean_out Primitive id containing mean output.
    /// @brief variance_out Primitive id containing variance output.
    /// @param epsilon Epsilon.
    /// @param inv_variance Primitive id containing inverted variance calculated in this primitive. For inference leave empty.
    batch_norm(const primitive_id& id,
               const primitive_id& input,
               float epsilon,
               const primitive_id& mean_out,
               const primitive_id& variance_out,
               const primitive_id& scale,
               const primitive_id& shift,
               const primitive_id& inv_variance = "",
               const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding),
          mean(mean_out),
          variance(variance_out),
          scale(scale),
          shift(shift),
          inv_variance(inv_variance),
          epsilon(epsilon) {}
    /// @brief Primitive id containing mean data.
    primitive_id mean;
    /// @brief Primitive id containing variance.
    primitive_id variance;
    /// @brief Primitive id containing scale.
    primitive_id scale;
    /// @brief Primitive id containing shift.
    primitive_id shift;
    /// @brief Primitive id containing inverted variance used in future gradient computing.
    primitive_id inv_variance;
    /// @brief Epsilon.
    float epsilon;
 protected:
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        std::vector<std::reference_wrapper<const primitive_id>> deps;
        if (!mean.empty() && !variance.empty()) {
            deps.push_back(mean);
            deps.push_back(variance);
        }
        if (!scale.empty() && !shift.empty()) {
            deps.push_back(scale);
            deps.push_back(shift);
        }
        if (!inv_variance.empty())
            deps.push_back(inv_variance);
        return deps;
    }
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/api/batch_norm_grad.hpp
+++ b/inference-engine/thirdparty/clDNN/api/batch_norm_grad.hpp
@ -1,61 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "primitive.hpp"
 #include <vector>
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
 /// @{
 /// @addtogroup cpp_topology Network Topology
 /// @{
 /// @addtogroup cpp_primitives Primitives
 /// @{
 /// @brief Performs backward batch normalization layer.
 /// @details Calculates mean gradient and gradient * input for every feature in data,
 /// then output is calculated as inv_variance * (input_grad - mean_grad_input * input - mean_grad)
 struct batch_norm_grad : public primitive_base<batch_norm_grad> {
    CLDNN_DECLARE_PRIMITIVE(batch_norm_grad)
    /// @brief Constructs batch normalization backward layer.
    /// @param id This primitive id.
    /// @param input_grad Input gradient primitive id.
    /// @param input Input primitive id.
    /// @param inv_variance Primitive id containing inverted variance from forward pass.
    batch_norm_grad(
        const primitive_id& id,
        const primitive_id& input_grad,
        const primitive_id& input,
        const primitive_id& inv_variance,
        const padding& output_padding = padding())
        : primitive_base(id, {input_grad, input}, output_padding), inv_variance(inv_variance) {
    }
    /// @brief Primitive id containing inverted variance from forward pass.
    primitive_id inv_variance;
 protected:
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        return {inv_variance};
    }
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/api/contract.hpp
+++ b/inference-engine/thirdparty/clDNN/api/contract.hpp
@ -1,95 +0,0 @@
 // Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "primitive.hpp"
 #include <vector>
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
 /// @{
 /// @addtogroup cpp_topology Network Topology
 /// @{
 /// @addtogroup cpp_primitives Primitives
 /// @{
 /// @brief Select mode for the @ref contract layer.
 enum class contract_mode : int32_t {
    /// @brief Sum reduction.
    sum,
    /// @brief Product reduction.
    prod,
    /// @brief All reduction.
    all,
    /// @brief Any reduction.
    any,
    /// @brief Max reduction.
    max
 };
 /// @brief Reduces input with an operation defined by @p mode along defined
 ///        by @p reduction_axes dimensions.
 ///
 /// @details Reduces the input using the binary operation determined by
 ///          @p mode. The @p reduction_axes determine the final shape of the
 ///          output, which is calculated based on the input shape by
 ///          collapsing the dimensions along which the reduction happens.
 ///          For example, for the input with
 /// @n      <tt>input_sizes = (in_b, in_f, in_y, in_x)</tt>
 /// @n a reduction with
 /// @n      <tt>reduction_axes = (2)</tt>
 /// @n would collapse the Y dimension, producing
 /// @n      <tt>output_shape = (1, in_b, in_f, in_x)</tt>
 /// @n where every element is a @p mode reduction of the input elements with
 /// @n the same B, F and X coordinates.
 /// @n
 /// @n@b Requirements:
 /// @n - @p reduction_axes size (dimensions count) must be within (inclusive) range
 ///      1 - 4.
 /// @n - @p reduction_axes mustn't have duplicate values.
 /// @n - Values of @p reduction_axes must be within (inclusive) range 0 - 3
 /// @n Breaking any of these conditions will raise an exception.
 struct contract : public primitive_base<contract> {
    CLDNN_DECLARE_PRIMITIVE(contract)
    /// @brief Constructs contract primitive / layer.
    ///
    /// @param id              An identifier of new primitive.
    /// @param input           An identifier of primitive which is an input for newly created
    ///                        contract primitive.
    /// @param mode            Reduction mode.
    /// @param reduction_axes  Axes positions (0-based, from left to right) in input_shape
    ///                        that are being reduced.
    /// @param output_padding  Optional padding for output from primitive.
    contract(
        const primitive_id& id,
        const primitive_id& input,
        contract_mode mode,
        const std::vector<uint16_t>& reduction_axes = {},
        const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding),
          mode(mode),
          reduction_axes(reduction_axes) {
    }
    /// @param mode Contract mode.
    contract_mode mode;
    /// @brief Array of axes positions from input shape (0-based, from left to right)
    ///        along which reduction should happen.
    std::vector<uint16_t> reduction_axes;
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/api/convolution_grad_input.hpp
+++ b/inference-engine/thirdparty/clDNN/api/convolution_grad_input.hpp
@ -1,95 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "deconvolution.hpp"
 #include "primitive.hpp"
 #include <vector>
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
 /// @{
 /// @addtogroup cpp_topology Network Topology
 /// @{
 /// @addtogroup cpp_primitives Primitives
 /// @{
 /// @brief Performs backward convolution operation for input.
 /// @details convolution_grad_input is similar to deconvolution layer without biases and activation support.
 /// It actually uses deconvolution primitive underneath with gradient bool set to true.
 struct convolution_grad_input : public deconvolution {
    /// @brief Constructs convolution_grad_input primitive.
    /// @param id This primitive id.
    /// @param input Input primitive id.
    /// @param weights List of primitive ids containing weights data.
    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
    /// where (0,0) point of the convolution_grad_input window should start calculations.
    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
    /// @param with_activation Enables Relu activation.
    /// @param activation_slp Relu activation slope.
    convolution_grad_input(const primitive_id& id,
                           const primitive_id& input,
                           const std::vector<primitive_id>& weights,
                           tensor stride = {1, 1, 1, 1},
                           tensor input_offset = {0, 0, 0, 0},
                           const padding& output_padding = padding())
        : deconvolution(id, input, {weights}, stride, input_offset, output_padding, true) {}
    /// @brief Constructs convolution_grad_input primitive (computes input paddings to match output size).
    /// @param id This primitive id.
    /// @param input Input primitive id.
    /// @param weights List of primitive ids containing weights data.
    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
    /// where (0,0) point of the convolution_grad_input window should start calculations.
    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
    /// @param with_activation Enables Relu activation.
    /// @param activation_slp Relu activation slope.
    /// @param output_size User-defined output data size of the primitive (w/o padding).
    convolution_grad_input(const primitive_id& id,
                           const primitive_id& input,
                           const std::vector<primitive_id>& weights,
                           tensor stride,
                           tensor input_offset,
                           tensor output_size,
                           const padding& output_padding = padding())
        : deconvolution(id, input, {weights}, stride, input_offset, output_size, output_padding, true) {}
    /// @brief Constructs convolution_grad_input primitive (computes input paddings to match output size).
    /// @param id This primitive id.
    /// @param input Input primitive id.
    /// @param weights List of primitive ids containing weights data.
    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
    /// where (0,0) point of the convolution_grad_input window should start calculations.
    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
    /// @param with_activation Enables Relu activation.
    /// @param activation_slp Relu activation slope.
    /// @param output_size User-defined output data size of the primitive (w/o padding).
    /// @return convolution_grad_input primitive with specified settings.
    static convolution_grad_input create_with_output_size(const primitive_id& id,
                                                          const primitive_id& input,
                                                          const std::vector<primitive_id>& weights,
                                                          tensor output_size,
                                                          tensor stride = {1, 1, 1, 1},
                                                          tensor input_offset = {0, 0, 0, 0},
                                                          const padding& output_padding = padding()) {
        return convolution_grad_input(id, input, weights, stride, input_offset, output_size, output_padding);
    }
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/api/convolution_grad_weights.hpp
+++ b/inference-engine/thirdparty/clDNN/api/convolution_grad_weights.hpp
@ -1,217 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "primitive.hpp"
 #include <vector>
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
 /// @{
 /// @addtogroup cpp_topology Network Topology
 /// @{
 /// @addtogroup cpp_primitives Primitives
 /// @{
 /// @brief Performs backward convolution operation for weights and biases.
 /// @details convolution_grad_weights updates weights and bias mutable data for training purposes.
 /// @details Please note that this primitive was not heavily tested and currently only batch=1 is enabled for this primitive.
 struct convolution_grad_weights
    : public primitive_base<convolution_grad_weights> {
    CLDNN_DECLARE_PRIMITIVE(convolution_grad_weights)
    /// @brief Constructs convolution_grad_weights primitive.
    /// @param id This primitive id.
    /// @param input Input gradient primitive id.
    /// @param input Input primitive id from convolution forward pass.
    /// @param weights List of primitive ids containing weights data.
    /// @param bias List of primitive ids containing bias data. Provide empty vector if using next parameters without bias.
    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
    /// where (0,0) point of the convolution_grad_weights window should start calculations.
    /// @param dilation Defines dilation size.
    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
    /// @param conv_grad Id of primitive which uses weights and biases updated in this primitive.
    /// This is for correct order of calculating. Leave empty if primitive is last in backward pass.
    convolution_grad_weights(const primitive_id& id,
                             const primitive_id& input_grad,
                             const primitive_id& input,
                             const std::vector<primitive_id>& weights,
                             const std::vector<primitive_id>& bias,
                             tensor stride = {1, 1, 1, 1},
                             tensor input_offset = {0, 0, 0, 0},
                             tensor dilation = {1, 1, 1, 1},
                             const primitive_id& conv_grad = "",
                             const padding& output_padding = padding())
        : primitive_base(id, {input_grad, input}, output_padding),
          conv_grad(conv_grad),
          stride(stride),
          input_offset(input_offset),
          dilation(dilation),
          output_grad_w(false),
          weights(weights),
          bias(bias),
          prev_weights_grad(std::vector<primitive_id>(0)),
          prev_bias_grad(std::vector<primitive_id>(0)) {}
    /// @brief Constructs convolution_grad_weights primitive (w/o bias).
    /// @param id This primitive id.
    /// @param input Input gradient primitive id.
    /// @param input Input primitive id from convolution forward pass.
    /// @param weights List of primitive ids containing weights data.
    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
    /// where (0,0) point of the convolution_grad_weights window should start calculations.
    /// @param dilation Defines dilation size.
    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
    /// @param Should primitive give weights gradient (delta) as an output
    /// @param conv_grad Id of primitive which uses weights and biases updated in this primitive.
    /// This is for correct order of calculating. Leave empty if primitive is last in backward pass.
    convolution_grad_weights(const primitive_id& id,
                             const primitive_id& input_grad,
                             const primitive_id& input,
                             const std::vector<primitive_id>& weights,
                             tensor stride = {1, 1, 1, 1},
                             tensor input_offset = {0, 0, 0, 0},
                             tensor dilation = {1, 1, 1, 1},
                             bool output_grad_w = false,
                             const primitive_id& conv_grad = "",
                             const padding& output_padding = padding())
        : primitive_base(id, {input_grad, input}, output_padding),
          conv_grad(conv_grad),
          stride(stride),
          input_offset(input_offset),
          dilation(dilation),
          output_grad_w(output_grad_w),
          weights(weights),
          bias(std::vector<primitive_id>(0)),
          prev_weights_grad(std::vector<primitive_id>(0)),
          prev_bias_grad(std::vector<primitive_id>(0)) {}
    /// @brief Constructs convolution_grad_weights primitive (w/o bias).
    /// @param id This primitive id.
    /// @param input Input gradient primitive id.
    /// @param input Input primitive id from convolution forward pass.
    /// @param weights List of primitive ids containing weights data.
    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
    /// where (0,0) point of the convolution_grad_weights window should start calculations.
    /// @param dilation Defines dilation size.
    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
    /// @param conv_grad Id of primitive which uses weights and biases updated in this primitive.
    /// This is for correct order of calculating. Leave empty if primitive is last in backward pass.
    convolution_grad_weights(const primitive_id& id,
                             const primitive_id& input_grad,
                             const primitive_id& input,
                             const std::vector<primitive_id>& weights,
                             tensor stride,
                             tensor input_offset,
                             tensor dilation,
                             const primitive_id& conv_grad = "",
                             const padding& output_padding = padding())
        : primitive_base(id, {input_grad, input}, output_padding),
          conv_grad(conv_grad),
          stride(stride),
          input_offset(input_offset),
          dilation(dilation),
          output_grad_w(false),
          weights(weights),
          bias(std::vector<primitive_id>(0)),
          prev_weights_grad(std::vector<primitive_id>(0)),
          prev_bias_grad(std::vector<primitive_id>(0)) {}
    /// @brief Constructs convolution_grad_weights primitive with momentum optimizer.
    /// @param id This primitive id.
    /// @param input Input gradient primitive id.
    /// @param input Input primitive id from convolution forward pass.
    /// @param weights List of primitive ids containing weights data.
    /// @param bias List of primitive ids containing bias data. Provide empty vector if using next parameters without bias.
    /// @param prev_weights_grad List of primitive ids which contains weights gradient data calculated in previous iteration. Used in momentum optimizer.
    /// @param prev_bias_grad List of primitive ids which contains bias gradient data calculated in previous iteration. Used in momentum optimizer.
    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
    /// where (0,0) point of the convolution_grad_weights window should start calculations.
    /// @param dilation Defines dilation size.
    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
    /// @param conv_grad Id of primitive which uses weights and biases updated in this primitive.
    /// This is for correct order of calculating. Leave empty if primitive is last in backward pass.
    convolution_grad_weights(const primitive_id& id,
                             const primitive_id& input_grad,
                             const primitive_id& input,
                             const std::vector<primitive_id>& weights,
                             const std::vector<primitive_id>& bias,
                             const std::vector<primitive_id>& prev_weights_grad,
                             const std::vector<primitive_id>& prev_bias_grad,
                             tensor stride = {1, 1, 1, 1},
                             tensor input_offset = {0, 0, 0, 0},
                             tensor dilation = {1, 1, 1, 1},
                             const primitive_id& conv_grad = "",
                             const padding& output_padding = padding())
        : primitive_base(id, {input_grad, input}, output_padding),
          conv_grad(conv_grad),
          stride(stride),
          input_offset(input_offset),
          dilation(dilation),
          output_grad_w(false),
          weights(weights),
          bias(bias),
          prev_weights_grad(prev_weights_grad),
          prev_bias_grad(prev_bias_grad) {}
    /// @brief Primitive id containing convolution gradient data.
    primitive_id conv_grad;
    /// @brief Defines shift in input buffer between adjacent calculations of output values.
    tensor stride;
    /// @brief Defines a shift, relative to (0,0) position of the input buffer,
    /// where (0,0) point of the convolution_grad_weights window should start calculations.
    tensor input_offset;
    /// @brief Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
    /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
    /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
    tensor dilation;
    /// @brief Should primitive give weights gradient (delta) as an output
    bool output_grad_w;
    /// @brief List of primitive ids containing weights data.
    const primitive_id_arr weights;
    /// @brief List of primitive ids containing bias data.
    const primitive_id_arr bias;
    /// @brief Array of primitive ids containing weights gradient data calculated in previous iteration.
    /// Amount of primitives and their memory sizes should be same as weights.
    const primitive_id_arr prev_weights_grad;
    /// @brief Array of primitive ids containing bias gradient data calculated in previous iteration.
    /// Amount of primitives and their memory sizes should be same as biases.
    const primitive_id_arr prev_bias_grad;
    /// @brief On how many cards split the computation to.
    int32_t split() const { return static_cast<int32_t>(weights.size()); }
 protected:
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        std::vector<std::reference_wrapper<const primitive_id>> ret;
        ret.reserve(weights.size() + bias.size() + !conv_grad.empty() + prev_weights_grad.size() +
                    prev_bias_grad.size());
        for (auto& w : weights) ret.push_back(std::ref(w));
        for (auto& b : bias) ret.push_back(std::ref(b));
        for (auto& g : prev_weights_grad) ret.push_back(std::ref(g));
        for (auto& g : prev_bias_grad) ret.push_back(std::ref(g));
        if (!conv_grad.empty())
            ret.push_back(conv_grad);
        return ret;
    }
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/api/deconvolution.hpp
+++ b/inference-engine/thirdparty/clDNN/api/deconvolution.hpp
@ -56,8 +56,7 @@ struct deconvolution : public primitive_base<deconvolution> {
          with_output_size(false),
          groups(1),
          weights(weights),
-          bias(bias),
+          bias(bias) {}
          _gradient(false) {}
    /// @brief Constructs deconvolution primitive.
    /// @param id This primitive id.
    /// @param input Input primitive id.
@ -83,8 +82,7 @@ struct deconvolution : public primitive_base<deconvolution> {
          with_output_size(false),
          groups(groups),
          weights(weights),
-          bias(bias),
+          bias(bias) {}
          _gradient(false) {}
    /// @brief Constructs deconvolution primitive (w/o bias).
    /// @param id This primitive id.
@ -100,16 +98,14 @@ struct deconvolution : public primitive_base<deconvolution> {
                  const std::vector<primitive_id>& weights,
                  tensor stride = {1, 1, 1, 1},
                  tensor input_offset = {0, 0, 0, 0},
-                  const padding& output_padding = padding(),
+                  const padding& output_padding = padding())
                  bool gradient = false)
        : primitive_base(id, {input}, output_padding),
          input_offset(input_offset),
          stride(stride),
          with_output_size(false),
          groups(1),
          weights(weights),
-          bias(std::vector<primitive_id>(0)),
+          bias(std::vector<primitive_id>(0)) {}
          _gradient(gradient) {}
    /// @brief Constructs deconvolution primitive (w/o bias).
    /// @param id This primitive id.
@ -127,16 +123,14 @@ struct deconvolution : public primitive_base<deconvolution> {
                  uint32_t groups,
                  tensor stride = {1, 1, 1, 1},
                  tensor input_offset = {0, 0, 0, 0},
-                  const padding& output_padding = padding(),
+                  const padding& output_padding = padding())
                  bool gradient = false)
        : primitive_base(id, {input}, output_padding),
          input_offset(input_offset),
          stride(stride),
          with_output_size(false),
          groups(groups),
          weights(weights),
-          bias(std::vector<primitive_id>(0)),
+          bias(std::vector<primitive_id>(0)) {}
          _gradient(gradient) {}
    /// @brief Constructs deconvolution primitive (computes input paddings to match output size).
    /// @param id This primitive id.
@ -164,8 +158,7 @@ struct deconvolution : public primitive_base<deconvolution> {
          output_size(output_size),
          groups(1),
          weights(weights),
-          bias(bias),
+          bias(bias) {}
          _gradient(false) {}
    /// @brief Constructs deconvolution primitive (computes input paddings to match output size).
    /// @param id This primitive id.
@ -195,8 +188,7 @@ struct deconvolution : public primitive_base<deconvolution> {
          output_size(output_size),
          groups(groups),
          weights(weights),
-          bias(bias),
+          bias(bias) {}
          _gradient(false) {}
    /// @brief Constructs deconvolution primitive (w/o bias, computes input paddings to match output size).
    /// @param id This primitive id.
@ -214,8 +206,7 @@ struct deconvolution : public primitive_base<deconvolution> {
                  tensor stride,
                  tensor input_offset,
                  tensor output_size,
-                  const padding& output_padding = padding(),
+                  const padding& output_padding = padding())
                  bool gradient = false)
        : primitive_base(id, {input}, output_padding),
          input_offset(input_offset),
          stride(stride),
@ -223,8 +214,7 @@ struct deconvolution : public primitive_base<deconvolution> {
          output_size(output_size),
          groups(1),
          weights(weights),
-          bias(std::vector<primitive_id>(0)),
+          bias(std::vector<primitive_id>(0)) {}
          _gradient(gradient) {}
    /// @brief Constructs deconvolution primitive (computes input paddings to match output size).
    /// @param id This primitive id.
@ -300,12 +290,8 @@ struct deconvolution : public primitive_base<deconvolution> {
    /// @brief On how many cards split the computation to.
    int32_t split() const { return static_cast<int32_t>(weights.size()); }
    /// @brief Indicates that deconvolution is used for convolution backward computation (convolution_grad_input)
    bool gradient() const { return _gradient; }
 protected:
    bool _gradient;
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        std::vector<std::reference_wrapper<const primitive_id>> ret;
        ret.reserve(weights.size() + bias.size());
--- a/inference-engine/thirdparty/clDNN/api/eltwise.hpp
+++ b/inference-engine/thirdparty/clDNN/api/eltwise.hpp
@ -92,13 +92,9 @@ struct eltwise : public primitive_base<eltwise> {
            eltwise_mode mode,
            const padding& output_padding = padding())
        : primitive_base(id, {input, input2}, output_padding),
          output_calibration_factors(""),
          output_quantization_factor(1.0f),
          input_quantization_factors(0),
          mode(mode),
          coefficients(std::vector<float>(0)),
-          stride(std::vector<tensor>(0)),
+          stride(std::vector<tensor>(0)) {}
          inputs_calibration_factors(std::vector<primitive_id>(0)) {}
    /// @brief Constructs eltwise primitive.
    /// @param id This primitive id.
@ -115,13 +111,9 @@ struct eltwise : public primitive_base<eltwise> {
            eltwise_mode mode,
            const padding& output_padding = padding())
        : primitive_base(id, {input, input2}, output_padding),
          output_calibration_factors(""),
          output_quantization_factor(1.0f),
          input_quantization_factors(0),
          mode(mode),
          coefficients(std::vector<float>(0)),
-          stride(stride),
+          stride(stride) {}
          inputs_calibration_factors(std::vector<primitive_id>(0)) {}
    /// @brief Constructs eltwise primitive.
    /// @param id This primitive id.
@ -134,13 +126,9 @@ struct eltwise : public primitive_base<eltwise> {
            data_types data_type,
            const padding& output_padding = padding())
        : primitive_base(id, inputs, output_padding, optional_data_type{data_type}),
          output_calibration_factors(""),
          output_quantization_factor(1.0f),
          input_quantization_factors(0),
          mode(mode),
          coefficients(std::vector<float>(0)),
-          stride(std::vector<tensor>(0)),
+          stride(std::vector<tensor>(0)) {}
          inputs_calibration_factors(std::vector<primitive_id>(0)) {}
    /// @brief Constructs eltwise primitive.
    /// @param id This primitive id.
@ -151,13 +139,9 @@ struct eltwise : public primitive_base<eltwise> {
            eltwise_mode mode,
            const padding& output_padding = padding())
        : primitive_base(id, inputs, output_padding),
          output_calibration_factors(""),
          output_quantization_factor(1.0f),
          input_quantization_factors(0),
          mode(mode),
          coefficients(std::vector<float>(0)),
-          stride(std::vector<tensor>(0)),
+          stride(std::vector<tensor>(0)) {}
          inputs_calibration_factors(std::vector<primitive_id>(0)) {}
    /// @brief Constructs eltwise primitive.
    /// @param id This primitive id.
@ -171,13 +155,9 @@ struct eltwise : public primitive_base<eltwise> {
            data_types data_type,
            const padding& output_padding = padding())
        : primitive_base(id, inputs, output_padding, optional_data_type{data_type}),
          output_calibration_factors(""),
          output_quantization_factor(1.0f),
          input_quantization_factors(0),
          mode(mode),
          coefficients(coefficients),
-          stride(std::vector<tensor>(0)),
+          stride(std::vector<tensor>(0)) {
          inputs_calibration_factors(std::vector<primitive_id>(0)) {
        if (mode == eltwise_mode::sum && !coefficients.empty() && coefficients.size() != inputs.size()) {
            throw std::invalid_argument("Invalid eltwise sum coefficients count (should be equal to 0 or input.size)");
        }
@ -186,31 +166,12 @@ struct eltwise : public primitive_base<eltwise> {
        }
    }
    /// @brief Primitive id containing output quanitization factors per output feature map.
    primitive_id output_calibration_factors;
    /// @brief Output quantization factor
    float output_quantization_factor;
    /// @brief List of quantization factors per input.
    std::vector<float> input_quantization_factors;
    /// @param mode Eltwise mode.
    eltwise_mode mode;
    /// @param coefficients Blob-wise coefficient for SUM operation.
    std::vector<float> coefficients;
    /// @brief Defines shift in input buffers between adjacent calculations of output values.
    std::vector<tensor> stride;
    /// @brief List of primitive ids containing input quantization factors per feature map, one primitive id for each input.
    const primitive_id_arr inputs_calibration_factors;
 protected:
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        std::vector<std::reference_wrapper<const primitive_id>> ret;
        if (!output_calibration_factors.empty())
            ret.push_back(output_calibration_factors);
        for (auto& icf : inputs_calibration_factors) ret.push_back(std::ref(icf));
        return ret;
    }
 };
 /// @}
 /// @}
--- a/inference-engine/thirdparty/clDNN/api/embed.hpp
+++ b/inference-engine/thirdparty/clDNN/api/embed.hpp
@ -1,79 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "primitive.hpp"
 #include <vector>
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
 /// @{
 /// @addtogroup cpp_topology Network Topology
 /// @{
 /// @addtogroup cpp_primitives Primitives
 /// @{
 /// @brief
 /// @details Performs embedding upon input.
 /// @n\b Example:
 /// @n input_size = { 8, 1, 1, 75 };
 /// @n weights_size = {15, 1, 62, 1 };
 /// @n output_size = { 8, 75, 15, 1 };
 /// @par Algorithm:
 /// @par Where:
 struct embed : public primitive_base<embed> {
    CLDNN_DECLARE_PRIMITIVE(embed)
    /// @brief Constructs embed primitive.
    /// @param id This primitive id.
    /// @param input Input primitive id.
    /// @param weights Primitive id containing weights data.
    /// @param bias Primitive id containing bias data.
    embed(
        const primitive_id& id,
        const primitive_id& input,
        const primitive_id& weights,
        const primitive_id& bias)
        : primitive_base(id, {input}), weights(weights), bias(bias) {}
    /// @brief Constructs embed primitive.
    /// @param id This primitive id.
    /// @param input Input primitive id.
    embed(
        const primitive_id& id,
        const primitive_id& input,
        const primitive_id& weights)
        : primitive_base(id, {input}), weights(weights), bias("") {}
    /// @brief Primitive id containing weights data.
    primitive_id weights;
    /// @brief Primitive id containing bias data.
    primitive_id bias;
 protected:
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        if (bias.empty())
            return {weights};
        else
            return {weights, bias};
    }
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
 #pragma once
--- a/inference-engine/thirdparty/clDNN/api/fully_connected_grad_input.hpp
+++ b/inference-engine/thirdparty/clDNN/api/fully_connected_grad_input.hpp
@ -1,59 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "primitive.hpp"
 #include <vector>
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
 /// @{
 /// @addtogroup cpp_topology Network Topology
 /// @{
 /// @addtogroup cpp_primitives Primitives
 /// @{
 /// @brief Performs backward fully connected layer (inner product) for input.
 struct fully_connected_grad_input : public primitive_base<fully_connected_grad_input> {
    CLDNN_DECLARE_PRIMITIVE(fully_connected_grad_input)
    /// @brief Constructs fully connected layer grad for input.
    /// @param id This primitive id.
    /// @param input_grad Input gradient primitive id.
    /// @param input Input primitive id.
    /// @param weights Primitive id containing weights data.
    fully_connected_grad_input(
        const primitive_id& id,
        const primitive_id& input_grad,
        const primitive_id& input,
        const primitive_id& weights,
        const padding& output_padding = padding())
        : primitive_base(id, {input_grad, input}, output_padding), weights(weights) {
    }
    /// @brief Primitive id containing weights data.
    primitive_id weights;
 protected:
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        return {weights};
    }
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/api/fully_connected_grad_weights.hpp
+++ b/inference-engine/thirdparty/clDNN/api/fully_connected_grad_weights.hpp
@ -1,115 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "primitive.hpp"
 #include <vector>
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
 /// @{
 /// @addtogroup cpp_topology Network Topology
 /// @{
 /// @addtogroup cpp_primitives Primitives
 /// @{
 /// @brief Performs backward fully connected layer (inner product) for weights and biases.
 struct fully_connected_grad_weights
    : public primitive_base<fully_connected_grad_weights> {
    CLDNN_DECLARE_PRIMITIVE(fully_connected_grad_weights)
    /// @brief Constructs fully connected layer for weights and biases.
    /// @param id This primitive id.
    /// @param input Input gradient primitive id.
    /// @param input Input primitive id.
    /// @param weights Primitive id containing weights data.
    /// @param bias Primitive id containing bias data. Provide empty string if using Relu without bias.
    /// @param fc_grad Id of primitive which uses weights and biases updated in this primitive.
    /// This is for correct order of calculating. Leave empty if primitive is last in backward pass.
    fully_connected_grad_weights(const primitive_id& id,
                                 const primitive_id& input_grad,
                                 const primitive_id& input,
                                 const primitive_id& weights,
                                 const primitive_id& bias = "",
                                 const primitive_id& fc_grad = "",
                                 const padding& output_padding = padding())
        : primitive_base(id, {input_grad, input}, output_padding),
          weights(weights),
          bias(bias),
          fc_grad(fc_grad),
          prev_weights_grad(""),
          prev_bias_grad("") {}
    /// @brief Constructs fully connected layer for weights and biases with momentum optimizer.
    /// @param id This primitive id.
    /// @param input Input gradient primitive id.
    /// @param input Input primitive id.
    /// @param weights Primitive id containing weights data.
    /// @param bias Primitive id containing bias data. Provide empty string if using Relu without bias.
    /// @param prev_weights_grad Id of primitive which contains weights gradient data calculated in previous iteration. Used in momentum optimizer.
    /// @param prev_bias_grad Id of primitive which contains bias gradient data calculated in previous iteration. Used in momentum optimizer.
    /// @param fc_grad Id of primitive which uses weights and biases updated in this primitive. This is for correct order of calculating.
    fully_connected_grad_weights(const primitive_id& id,
                                 const primitive_id& input_grad,
                                 const primitive_id& input,
                                 const primitive_id& weights,
                                 const primitive_id& bias,
                                 const primitive_id& prev_weights_grad,
                                 const primitive_id& prev_bias_grad,
                                 const primitive_id& fc_grad = "",
                                 const padding& output_padding = padding())
        : primitive_base(id, {input_grad, input}, output_padding),
          weights(weights),
          bias(bias),
          fc_grad(fc_grad),
          prev_weights_grad(prev_weights_grad),
          prev_bias_grad(prev_bias_grad) {}
    /// @brief Primitive id containing weights data.
    primitive_id weights;
    /// @brief Primitive id containing bias data.
    primitive_id bias;
    /// @brief Primitive id containing fully connected gradient data.
    primitive_id fc_grad;
    /// @brief Id of primitive containing weights gradient data calculated in previous iteration. It's memory size should be same as weights.
    primitive_id prev_weights_grad;
    /// @brief Id of primitive containing bias gradient data calculated in previous iteration. It's memory size should be same as biases.
    primitive_id prev_bias_grad;
 protected:
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        std::vector<std::reference_wrapper<const primitive_id>> ret;
        ret.reserve(1 + !bias.empty() + !fc_grad.empty() + !prev_weights_grad.empty() + !prev_bias_grad.empty());
        ret.push_back(weights);
        if (!bias.empty())
            ret.push_back(bias);
        if (!prev_weights_grad.empty())
            ret.push_back(prev_weights_grad);
        if (!prev_bias_grad.empty())
            ret.push_back(prev_bias_grad);
        if (!fc_grad.empty())
            ret.push_back(fc_grad);
        return ret;
    }
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/api/index_select.hpp
+++ b/inference-engine/thirdparty/clDNN/api/index_select.hpp
@ -1,109 +0,0 @@
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "primitive.hpp"
 #include <vector>
 namespace cldnn {
 /// @brief Axis which index_select primitive will index.
 enum class index_select_axis_name {
    along_b,
    along_f,
    along_y,
    along_x
 };
 /// @brief Select index, which will be copied to the output..
 ///
 /// @details Applies index selecting along specified dimension. The indices, which will be copied are specifed by
 ///          by @c indices.
 /// @n
 /// @n Example:
 /// @n      <tt>input_sizes  = (1, 2, 4, 2)</tt>
 /// @n      <tt>input_values = (a, b, c, d)</tt>
 /// @n      <tt>               (e, f, g, h)</tt>
 /// @n      <tt>indices_sizes  = (1, 1, 6, 1)</tt>
 /// @n      <tt>indices_values = {0, 0, 1, 1, 3, 3}</tt>
 /// @n  For axis: along_x:
 /// @n      <tt>output_sizes  = (1, 2, 6, 2)</tt>
 /// @n      <tt>output_values = (a, a, b, b, d, d)</tt>
 /// @n      <tt>                (e, e, f, f, h, h)</tt>
 /// @n
 /// @n The resulting output will have sizes equal to input_size with changed concrete tensor size to inidices x size.
 /// @n
 /// @n@b Requirements:
 /// @n - @c input must be a valid primitive_id, which output's format is bfyx/yxfb;
 /// @n - @c indices must be a valid primitive_id, which output's layout is: (bfyx/yxfb, i32, {1, 1, indicies_size, 1})
 /// @n - @c axis - valid index_select_axis_name instance.
 /// @n Breaking any of this conditions will cause exeption throw.
 struct index_select : public primitive_base<index_select> {
    CLDNN_DECLARE_PRIMITIVE(index_select)
    /// @brief Constructs index_select primitive / layer.
    ///
    /// @param id                 An identifier of new primitive.
    /// @param input              An identifier of primitive, which is an input for newly created
    ///                           index_select primitive.
    /// @param indicies           An identifer of primitive, which have indices in memory distributed along x.
    /// @param axis               Axis of index selecting.
    /// @param output_padding     Optional padding for output from primitive.
    index_select(
        const primitive_id& id,
        const primitive_id& input,
        const primitive_id& indices,
        index_select_axis_name axis = index_select_axis_name::along_b,
        const padding& output_padding = padding())
        : primitive_base(id, {input, indices}, output_padding), axis({axis}), reverse(false) {}
    /// @brief Constructs index_select primitive / layer.
    ///
    /// @param id                 An identifier of new primitive.
    /// @param input              An identifier of primitive, which is an input for newly created
    ///                           index_select primitive.
    /// @param axis               Axis of index selecting.
    /// @param output_padding     Optional padding for output from primitive.
    index_select(
        const primitive_id& id,
        const primitive_id& input,
        index_select_axis_name axis = index_select_axis_name::along_b,
        const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding), axis({axis}), reverse(true) {}
    /// @brief Constructs index_select primitive / layer.
    ///
    /// @param id                 An identifier of new primitive.
    /// @param input              An identifier of primitive, which is an input for newly created
    ///                           index_select primitive.
    /// @param axis               Vector of axes of index selecting.
    /// @param output_padding     Optional padding for output from primitive.
    index_select(
        const primitive_id& id,
        const primitive_id& input,
        const std::vector<index_select_axis_name>& axis = {index_select_axis_name::along_b},
        const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding), axis(axis), reverse(true) {}
    /// @brief A list of axes of index selecting
    std::vector<index_select_axis_name> axis;
    /// @brief Do index_select in reverse order on axis/axes.
    bool reverse;
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/api/lookup_table.hpp
+++ b/inference-engine/thirdparty/clDNN/api/lookup_table.hpp
@ -1,58 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "primitive.hpp"
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
 /// @{
 /// @addtogroup cpp_topology Network Topology
 /// @{
 /// @addtogroup cpp_primitives Primitives
 /// @{
 /// @brief Returns values from data on which given indices are pointing at.
 struct lookup_table : public primitive_base<lookup_table> {
    CLDNN_DECLARE_PRIMITIVE(lookup_table)
    /// @brief Enum type to specify axis to maximize/minimize along.
    enum axis_name { batch, feature, x, y, xyf };
    /// @brief Constructs lookup_table primitive.
    /// @param id This primitive id.
    /// @param input_data Input data primitive id.
    /// @param input_indices Input indices primitive id.
    /// @param axis Axis to return values from.
    lookup_table(const primitive_id& id,
                 const primitive_id& input_data,
                 const primitive_id& input_indices,
                 axis_name axis = axis_name::xyf,
                 const padding& output_padding = padding())
        : primitive_base(id, {input_data, input_indices}, output_padding),
          axis(axis),
          with_axis(axis == axis_name::xyf ? false : true) {}
    /// @brief Axis to return values from. If not set, returns data which index is pointing at in the flattened x, y, f dimensions for each batch.
    axis_name axis;
    /// @brief Indicates that the primitive has user defined axis to return values from.
    bool with_axis;
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/api/network.hpp
+++ b/inference-engine/thirdparty/clDNN/api/network.hpp
@ -113,12 +113,6 @@ struct network {
    /// @brief Provides user-supplied @ref memory for output primitives defined by user in source @ref topology.
    void set_output_memory(const primitive_id& id, const memory& mem) const;
    /// @brief Sets learning rate for training primitives.
    void set_learning_rate(const float lr);
    /// @brief Return learning rate.
    float get_learning_rate();
    /// @brief Return stream id.
    uint16_t get_stream_id();
--- a/inference-engine/thirdparty/clDNN/api/scale_grad_input.hpp
+++ b/inference-engine/thirdparty/clDNN/api/scale_grad_input.hpp
@ -1,51 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "primitive.hpp"
 #include <vector>
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
 /// @{
 /// @addtogroup cpp_topology Network Topology
 /// @{
 /// @addtogroup cpp_primitives Primitives
 /// @{
 /// @brief Performs scale primitive backward for input.
 struct scale_grad_input : public primitive_base<scale_grad_input> {
    CLDNN_DECLARE_PRIMITIVE(scale_grad_input)
    /// @brief Constructs scale_grad_input.
    /// @param id This primitive id.
    /// @param input Input primitive id.
    /// @param scale_input Scale input primitive id with values needed for product computation.
    scale_grad_input(const primitive_id& id,
                     const primitive_id& input,
                     const primitive_id& scale_input,  // should be bfyx or yxfb, where each dimension can be 1, if all
                                                       // dimensions are 1 then this is scalar
                     const padding& output_padding = padding())
        : primitive_base(id, {input, scale_input}, output_padding) {}
 protected:
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {}; }
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/api/scale_grad_weights.hpp
+++ b/inference-engine/thirdparty/clDNN/api/scale_grad_weights.hpp
@ -1,131 +0,0 @@
 /*
 // Copyright (c) 2016 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "primitive.hpp"
 #include <vector>
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
 /// @{
 /// @addtogroup cpp_topology Network Topology
 /// @{
 /// @addtogroup cpp_primitives Primitives
 /// @{
 /// @brief Performs scale layer backward for scale_input and biases.
 struct scale_grad_weights : public primitive_base<scale_grad_weights> {
    CLDNN_DECLARE_PRIMITIVE(scale_grad_weights)
    /// @brief Constructs scale_grad_weights primitive without bias.
    /// @param id This primitive id.
    /// @param input Input primitive id. Same as input for scale forward.
    /// @param input_grad Input gradient primitive id.
    /// @param scale_input Scale input primitive id.
    /// @param scale_grad Id of primitive which uses weights and biases updated in this primitive. This is for correct order of calculating.
    scale_grad_weights(const primitive_id& id,
                       const primitive_id& input,
                       const primitive_id& input_grad,
                       const primitive_id& scale_input,      // should be one number per feature
                       const primitive_id& scale_grad = "",  // leave empty if this is last primitive in backward pass
                       const padding& output_padding = padding())
        : primitive_base(id, {input, input_grad}, output_padding),
          scale_input(scale_input),
          bias(""),
          prev_scale_grad(""),
          prev_bias_grad(""),
          scale_grad(scale_grad) {}
    /// @brief Constructs scale_grad_weights primitive with optional adding bias.
    /// @param id This primitive id.
    /// @param input Input primitive id. Same as input for scale forward.
    /// @param input_grad Input gradient primitive id.
    /// @param scale_input Scale input primitive id.
    /// @param bias Primitive id containing bias data.
    /// @param scale_grad Id of primitive which uses weights and biases updated in this primitive. This is for correct order of calculating.
    scale_grad_weights(const primitive_id& id,
                       const primitive_id& input,
                       const primitive_id& input_grad,
                       const primitive_id& scale_input,      // should be one number per feature
                       const primitive_id& bias,             // should be same size as scale_input
                       const primitive_id& scale_grad = "",  // leave empty if this is last primitive in backward pass
                       const padding& output_padding = padding())
        : primitive_base(id, {input, input_grad}, output_padding),
          scale_input(scale_input),
          bias(bias),
          prev_scale_grad(""),
          prev_bias_grad(""),
          scale_grad(scale_grad) {}
    /// @brief Constructs scale_grad_weights primitive with optional bias and momentum optimizer.
    /// @param id This primitive id.
    /// @param input Input primitive id. Same as input for scale forward.
    /// @param input_grad Input gradient primitive id.
    /// @param scale_input Scale input primitive id.
    /// @param bias Primitive id containing bias data.
    /// @param prev_scale_grad Id of primitive which contains scale gradient data calculated in previous iteration. Used in momentum optimizer.
    /// @param prev_bias_grad Id of primitive which contains bias gradient data calculated in previous iteration. Used in momentum optimizer.
    /// @param scale_grad Id of primitive which uses weights and biases updated in this primitive. This is for correct order of calculating.
    scale_grad_weights(const primitive_id& id,
                       const primitive_id& input,
                       const primitive_id& input_grad,
                       const primitive_id& scale_input,  // should be one number per feature
                       const primitive_id& bias,         // should be same size as scale_input
                       const primitive_id& prev_scale_grad,
                       const primitive_id& prev_bias_grad,   // leave empty if bias not specified
                       const primitive_id& scale_grad = "",  // leave empty if this is last primitive in backward pass
                       const padding& output_padding = padding())
        : primitive_base(id, {input, input_grad}, output_padding),
          scale_input(scale_input),
          bias(bias),
          prev_scale_grad(prev_scale_grad),
          prev_bias_grad(prev_bias_grad),
          scale_grad(scale_grad) {}
    /// @brief Scale input primitive id.
    primitive_id scale_input;
    /// @brief Primitive id containing bias data.
    primitive_id bias;
    /// @brief Primitive id containing scale gradient data calculated in previous iteration.
    primitive_id prev_scale_grad;
    /// @brief Primitive id containing bias gradient data calculated in previous iteration.
    primitive_id prev_bias_grad;
    /// @brief Primitive id which uses weights and biases updated in this primitive.
    primitive_id scale_grad;
 protected:
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        std::vector<std::reference_wrapper<const primitive_id>> ret;
        ret.reserve(1 + !bias.empty() + !prev_scale_grad.empty() + !prev_bias_grad.empty());
        ret.push_back(scale_input);
        if (!bias.empty())
            ret.push_back(bias);
        if (!prev_scale_grad.empty())
            ret.push_back(prev_scale_grad);
        if (!prev_bias_grad.empty())
            ret.push_back(prev_bias_grad);
        if (!scale_grad.empty())
            ret.push_back(scale_grad);
        return ret;
    }
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/api/softmax_loss_grad.hpp
+++ b/inference-engine/thirdparty/clDNN/api/softmax_loss_grad.hpp
@ -1,47 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "primitive.hpp"
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
 /// @{
 /// @addtogroup cpp_topology Network Topology
 /// @{
 /// @addtogroup cpp_primitives Primitives
 /// @{
 /// @brief Backward pass for Softmax log loss.
 /// @details The output values are the same as input_prob, except for the correct one based on the label which is subtracted by 1.
 struct softmax_loss_grad : public primitive_base<softmax_loss_grad> {
    CLDNN_DECLARE_PRIMITIVE(softmax_loss_grad)
    /// @brief Constructs softmax_loss_grad primitive.
    /// @param id This primitive id.
    /// @param input_prob Input primitive id.
    /// @param labels Labels primitive id.
    softmax_loss_grad(const primitive_id& id,
                      const primitive_id& input_prob,
                      const primitive_id& labels,
                      const padding& output_padding = padding())
        : primitive_base(id, {input_prob, labels}, output_padding) {}
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/api_extension/fused_conv_bn_scale.hpp
+++ b/inference-engine/thirdparty/clDNN/api_extension/fused_conv_bn_scale.hpp
@ -1,115 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "api/primitive.hpp"
 #include <vector>
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
 /// @{
 /// @addtogroup cpp_topology Network Topology
 /// @{
 /// @addtogroup cpp_primitives Primitives
 /// @{
 /// @brief Primitives that fuses convolution, batch norm, scale and optionally Relu.
 struct fused_conv_bn_scale : public primitive_base<fused_conv_bn_scale> {
    CLDNN_DECLARE_PRIMITIVE(fused_conv_bn_scale)
    /// @brief Constructs convolution primitive fused with batch norm and scale.
    /// @param id This primitive id.
    /// @param input Input primitive id.
    /// @param weights List of primitive ids containing weights data.
    /// @param bias List of primitive ids containing bias data.
    /// @param epsilon Small number to protect from 0 dividing.
    /// @param scale_input Scale input primitive id with values needed for product computation. Used in fused scale part.
    /// @param scale_bias Primitive id containing bias data for fused scale part.
    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
    /// where (0,0) point of the convolution window should start calculations.
    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
    /// @param inv_variance Primitive id containing inverted variance calculated in this primitive. Used in fused batch norm part.
    /// @param with_activation Enable Relu activation.
    /// @param activation_slp Relu activation slope.
    fused_conv_bn_scale(const primitive_id& id,
                        const primitive_id& input,
                        const std::vector<primitive_id>& weights,
                        const std::vector<primitive_id>& bias,
                        float epsilon,
                        const primitive_id& scale_input,
                        const primitive_id& scale_bias = "",
                        tensor stride = {1, 1, 1, 1},
                        tensor dilation = {1, 1, 1, 1},
                        tensor input_offset = {0, 0, 0, 0},
                        const primitive_id& inv_variance = "",
                        const padding& output_padding = padding())
        : primitive_base(id, {input, scale_input}, output_padding),
          input_offset(input_offset),
          stride(stride),
          dilation(dilation),
          with_output_size(false),
          scale_bias(scale_bias),
          inv_variance(inv_variance),
          epsilon(epsilon),
          weights(weights),
          bias(bias) {
        if ((bias.size() != 0) && (weights.size() != bias.size()))
            throw std::runtime_error("convolution's weights/bias count does not match");
    }
    /// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
    tensor input_offset;
    /// @brief Defines shift in input buffer between adjacent calculations of output values.
    tensor stride;
    /// @brief Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
    /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
    /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
    tensor dilation;
    /// @brief Indicates that the primitive has user-defined output size (non-zero value).
    bool with_output_size;
    /// @brief User-defined output data size of the primitive (w/o padding).
    tensor output_size;
    /// @brief Primitive id containing scale bias data for fused convolution.
    primitive_id scale_bias;
    /// @brief Primitive id containing inverted variance used in future gradient computing for fused convolution.
    primitive_id inv_variance;
    /// @brief Epsilon for fused convolution.
    float epsilon;
    /// @brief On how many cards split the computation to.
    int32_t split() const { return static_cast<int32_t>(weights.size()); }
    /// @brief List of primitive ids containing weights data.
    const primitive_id_arr weights;
    /// @brief List of primitive ids containing bias data.
    const primitive_id_arr bias;
 protected:
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        std::vector<std::reference_wrapper<const primitive_id>> ret;
        ret.reserve(weights.size() + bias.size() + !scale_bias.empty() + !inv_variance.empty());
        for (auto& w : weights) ret.push_back(std::ref(w));
        for (auto& b : bias) ret.push_back(std::ref(b));
        if (!scale_bias.empty())
            ret.push_back(scale_bias);
        if (!inv_variance.empty())
            ret.push_back(inv_variance);
        return ret;
    }
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/api_extension/fused_conv_eltwise.hpp
+++ b/inference-engine/thirdparty/clDNN/api_extension/fused_conv_eltwise.hpp
@ -37,9 +37,6 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
    /// @param input Input primitive id.
    /// @param weights List of primitive ids containing weights data.
    /// @param bias List of primitive ids containing bias data.
    /// @param w_quantization_factor List of primitive ids containing weights quanitization factors per output feature map.
    /// @param output_calibration_factors List of primitive ids output containing calibration factors per output feature map.
    /// @param i_quantization_factor Input quantization factor
    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
    /// where (0,0) point of the convolution window should start calculations.
    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
@ -57,11 +54,6 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
                       eltwise_mode mode,
                       const std::vector<primitive_id>& weights,
                       const std::vector<primitive_id>& bias,
                       const std::vector<primitive_id>& conv_w_quantization_factor,
                       const std::vector<primitive_id>& conv_output_calibration_factors,
                       const float conv_i_quantization_factor,
                       const float non_conv_scale,
                       const primitive_id& eltw_output_calibration_factors,
                       const std::vector<tensor>& eltw_stride,
                       tensor stride = {1, 1, 1, 1},
                       tensor input_offset = {0, 0, 0, 0},
@ -74,18 +66,10 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
                       optional_data_type output_data_type = {})
        : primitive_base(id, {input, input2}, output_padding, output_data_type),
          conv((primitive_id_arr)weights,
-              (primitive_id_arr)bias,
+              (primitive_id_arr)bias),
-              (primitive_id_arr)conv_w_quantization_factor,
+          eltw(),
              (primitive_id_arr)conv_output_calibration_factors),
          eltw(eltw_output_calibration_factors),
          non_conv_scale(non_conv_scale),
          conv_weights(weights),
-          conv_bias(bias),
+          conv_bias(bias) {
          conv_weights_quantization_factors(conv_w_quantization_factor),
          conv_output_calibration_factors(conv_output_calibration_factors) {
        conv.input_quantization_factor = conv_i_quantization_factor;
        conv.output_quantization_factor = 1.0f;
        conv.input_offset = input_offset;
        conv.stride = stride;
        conv.dilation = dilation;
@ -100,10 +84,6 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
        if ((bias.size() != 0) && (weights.size() != bias.size()))
            throw std::runtime_error("convolution's weights/bias count does not match");
        if (conv.output_calibration_factors.size()) {
            if ((weights.size() != 0) && (weights.size() != conv.weights_quantization_factors.size()))
                throw std::runtime_error("convolution's weights count does not match quantization factors count");
        }
    }
    struct conv_data {
@ -111,14 +91,6 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
        const primitive_id_arr weights;
        /// @brief List of primitive ids containing bias data.
        const primitive_id_arr bias;
        /// @brief List of primitive ids containing weights quanitization factors per output feature map.
        const primitive_id_arr weights_quantization_factors;
        /// @brief List of primitive ids containing output quanitization factors per output feature map for convolution.
        const primitive_id_arr output_calibration_factors;
        /// @brief Input quantization factor for convolution
        float input_quantization_factor;
        /// @brief Output quantization factor for convolution
        float output_quantization_factor;
        /// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
        tensor input_offset;
        /// @brief Defines shift in input buffer between adjacent calculations of output values.
@ -137,20 +109,12 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
        tensor output_size;
        conv_data(const primitive_id_arr& weights,
-                  const primitive_id_arr& bias,
+                  const primitive_id_arr& bias)
                  const primitive_id_arr& weights_quantization_factors,
                  const primitive_id_arr& output_calibration_factors)
            : weights(weights),
-              bias(bias),
+              bias(bias) {}
              weights_quantization_factors(weights_quantization_factors),
              output_calibration_factors(output_calibration_factors) {}
    } conv;
    struct eltw_data {
        /// @brief Primitive id containing output quanitization factors per output feature map.
        primitive_id output_calibration_factors;
        /// @brief Output quantization factor for eltwise
        float output_quantization_factor;
        /// @param mode Eltwise mode.
        eltwise_mode mode;
        /// @brief Enable Relu activation.
@ -159,22 +123,11 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
        float activation_negative_slope;
        /// @brief Defines shift in input buffers between adjacent calculations of output values.
        std::vector<tensor> stride;
        explicit eltw_data(const primitive_id& output_calibration_factors)
            : output_calibration_factors(output_calibration_factors) {}
    } eltw;
    /// @brief On how many cards split the computation to.
    int32_t split() const { return static_cast<int32_t>(conv.weights.size()); }
    // FIXME: In fact, that should be needed for any EltWise primitive, not
    // only the fused one. What's more important, these scales should be
    // separate for different inputs and probably per-channel, not per
    // primitive.
    //
    // I'm only needing a scalar for my particular task, so let's hack like
    // this in the meantime. The final design is still to be investigated.
    float non_conv_scale = 1.0f;
    /// @brief Is optimization that output contains data from second input ON ?
    bool second_input_in_output = false;
    bool depth_to_space_already_fused = false;
@ -182,21 +135,13 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
 protected:
    const primitive_id_arr conv_weights;
    const primitive_id_arr conv_bias;
    const primitive_id_arr conv_weights_quantization_factors;
    const primitive_id_arr conv_output_calibration_factors;
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        std::vector<std::reference_wrapper<const primitive_id>> ret;
-        ret.reserve(conv.weights.size() + conv.bias.size() + conv.weights_quantization_factors.size() +
+        ret.reserve(conv.weights.size() + conv.bias.size());
                    conv.output_calibration_factors.size() + (eltw.output_calibration_factors.empty() ? 0 : 1));
        for (auto& w : conv.weights) ret.push_back(std::ref(w));
        for (auto& b : conv.bias) ret.push_back(std::ref(b));
        for (auto& q : conv.weights_quantization_factors) ret.push_back(std::ref(q));
        for (auto& q : conv.output_calibration_factors) ret.push_back(std::ref(q));
        if (!eltw.output_calibration_factors.empty())
            ret.push_back(eltw.output_calibration_factors);
        return ret;
    }
--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_tools.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_tools.h
@ -67,7 +67,6 @@ inline uint8_t GetActivationAdditionalParamsNumber(ActivationFunction func) {
            break;
        case ActivationFunction::RELU_NEGATIVE_SLOPE:
        case ActivationFunction::ELU:
        case ActivationFunction::RELU_NEGATIVE_SLOPE_GRAD:
            paramsNum = 1;
            break;
        default:
--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
@ -25,8 +25,6 @@ enum class KernelType {
    UNKNOWN,
    ARG_MAX_MIN,
    AVERAGE_UNPOOLING,
    BATCH_NORM_GRAD,
    LOOKUP_TABLE,
    CONVOLUTION,
    DECONVOLUTION,
    LRN,
@ -38,9 +36,7 @@ enum class KernelType {
    SOFT_MAX,
    ELTWISE,
    SCALE,
    FUSED_CONV_BN_SCALE,
    FUSED_CONV_ELTWISE,
    TABLE_LOOKUP,
    REORDER,
    RESHAPE,
    PERMUTE,
@ -49,21 +45,14 @@ enum class KernelType {
    REGION_YOLO,
    REORG_YOLO,
    MAX_UNPOOLING,
    CONVOLUTION_GRAD_WEIGHTS,
    SCALE_GRAD_WEIGHTS,
    MVN,
    FULLY_CONNECTED_GRAD_INPUT,
    FULLY_CONNECTED_GRAD_WEIGHTS,
    LSTM_GEMM,
    LSTM_ELT,
    EMBED,
    SOFT_MAX_LOSS_GRAD,
    BORDER,
    TILE,
    SELECT,
    BROADCAST,
    GEMM,
    INDEX_SELECT,
    PYRAMID_ROI_ALIGN,
    CONTRACT,
    ONE_HOT,
@ -133,8 +122,6 @@ enum class ActivationFunction {
    SQRT,
    LINEAR,
    ELU,
    RELU_GRAD,
    RELU_NEGATIVE_SLOPE_GRAD,
    SIN,
    ASIN,
    SINH,
@ -155,7 +142,6 @@ enum class ActivationFunction {
    NEGATIVE,
    NOT,
    POW,
    NONE_GRAD,
    ERF,
    HARD_SIGMOID,
    RECIPROCAL,
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp
@ -103,9 +103,6 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con
    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point,
                     DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params));
    if (newParams.gradient)
        kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
    if (!newParams.inputActivationParams.empty()) {
        kernel.arguments.push_back({ArgumentDescriptor::Types::SLOPE, 0});
    }
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp
@ -34,7 +34,6 @@ ParamsKey ActivationKernelOpt::GetSupportedKey() const {
    k.EnableAllOutputLayout();
    k.EnableTensorOffset();
    k.EnableBatching();
    k.EnableGradient();
    return k;
 }
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp
@ -38,7 +38,6 @@ ParamsKey ActivationKernelRef::GetSupportedKey() const {
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBatching();
    k.EnableGradient();
    return k;
 }
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp
@ -1,88 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "batch_norm_kernel_base.h"
 #include <algorithm>
 namespace kernel_selector {
 bool BatchNormKernelBase::Validate(const Params& p, const optional_params& o) const {
    if (p.GetType() != KernelType::BATCH_NORM_GRAD || o.GetType() != KernelType::BATCH_NORM_GRAD) {
        return false;
    }
    return true;
 }
 JitConstants BatchNormKernelBase::GetJitConstants(const batch_norm_params& params) const {
    JitConstants jit = MakeBaseParamsJitConstants(params);
    jit.AddConstant(MakeJitConstant("EPSILON", params.batchNormParams.epsilon));
    if (params.batchNormParams.with_inv_var)
        jit.AddConstant(MakeJitConstant("FORWARD", 1));
    if (params.batchNormParams.with_scale_shift)
        jit.AddConstant(MakeJitConstant("SCALE_SHIFT", 1));
    if (params.batchNormParams.with_mean_var_out)
        jit.AddConstant(MakeJitConstant("MEAN_VAR_OUT", 1));
    return jit;
 }
 BatchNormKernelBase::DispatchData BatchNormKernelBase::SetDefault(const batch_norm_params& params) const {
    DispatchData kd;
    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
    kd.gws0 = params.inputs[0].Batch().v;
    kd.gws1 = params.inputs[0].Feature().v;
    kd.gws2 = 1;
    kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(256));
    while (kd.gws0 % kd.lws0 != 0) {
        --kd.lws0;
    }
    kd.lws1 = 1;
    kd.lws2 = 1;
    return kd;
 }
 KernelsData BatchNormKernelBase::GetCommonKernelsData(const Params& params,
                                                      const optional_params& options,
                                                      float estimatedTime) const {
    if (!Validate(params, options)) {
        return {};
    }
    const batch_norm_params& orgParams = static_cast<const batch_norm_params&>(params);
    DispatchData runInfo = SetDefault(orgParams);
    KernelData kd = KernelData::Default<batch_norm_params>(params);
    auto cldnn_jit = GetJitConstants(orgParams);
    auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
    auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
    auto& kernel = kd.kernels[0];
    int inputs_num = 1 + orgParams.batchNormParams.with_inv_var + 2 * orgParams.batchNormParams.with_scale_shift +
                     2 * orgParams.batchNormParams.with_mean_var_out;
    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, inputs_num);
    kd.estimatedTime = estimatedTime;
    return {kd};
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.h
@ -1,66 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #pragma once
 #include "common_kernel_base.h"
 #include "kernel_selector_params.h"
 namespace kernel_selector {
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // batch_norm_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct batch_norm_params : public base_params {
    batch_norm_params() : base_params(KernelType::BATCH_NORM_GRAD) {}
    struct DedicatedParams {
        float epsilon;
        bool with_inv_var;
        bool with_scale_shift;
        bool with_mean_var_out = false;
    };
    DedicatedParams batchNormParams;
    virtual ParamsKey GetParamsKey() const {
        return base_params::GetParamsKey();
    }
 };
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // batch_norm_optional_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct batch_norm_optional_params : optional_params {
    batch_norm_optional_params() : optional_params(KernelType::BATCH_NORM_GRAD) {}
 };
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // BatchNormKernelBase
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 class BatchNormKernelBase : public common_kernel_base {
 public:
    using common_kernel_base::common_kernel_base;
    virtual ~BatchNormKernelBase() {}
    using DispatchData = CommonDispatchData;
 protected:
    bool Validate(const Params& params, const optional_params& options) const override;
    KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const;
    virtual JitConstants GetJitConstants(const batch_norm_params& params) const;
    virtual DispatchData SetDefault(const batch_norm_params& params) const;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_ref.cpp
@ -1,41 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "batch_norm_kernel_ref.h"
 namespace kernel_selector {
 ParamsKey BatchNormKernelRef::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::F16);
    k.EnableInputDataType(Datatype::F32);
    k.EnableInputDataType(Datatype::INT8);
    k.EnableOutputDataType(Datatype::F32);
    k.EnableOutputDataType(Datatype::F16);
    k.EnableOutputDataType(Datatype::INT8);
    k.EnableInputLayout(DataLayout::bfyx);
    k.EnableInputLayout(DataLayout::byxf);
    k.EnableInputLayout(DataLayout::yxfb);
    k.EnableOutputLayout(DataLayout::bfyx);
    k.EnableOutputLayout(DataLayout::byxf);
    k.EnableOutputLayout(DataLayout::yxfb);
    k.EnableBatching();
    return k;
 }
 KernelsData BatchNormKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
    return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_ref.h
@ -1,30 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #pragma once
 #include "batch_norm_kernel_base.h"
 namespace kernel_selector {
 class BatchNormKernelRef : public BatchNormKernelBase {
 public:
    BatchNormKernelRef() : BatchNormKernelBase("batch_norm_gpu_ref") {}
    virtual ~BatchNormKernelRef() {}
    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
    ParamsKey GetSupportedKey() const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_selector.cpp
@ -1,29 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "batch_norm_kernel_selector.h"
 #include "batch_norm_kernel_ref.h"
 namespace kernel_selector {
 batch_norm_kernel_selector::batch_norm_kernel_selector() {
    Attach<BatchNormKernelRef>();
 }
 KernelsData batch_norm_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
    return GetNaiveBestKernel(params, options, KernelType::BATCH_NORM_GRAD);
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_selector.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_selector.h
@ -1,35 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #pragma once
 #include "kernel_selector.h"
 namespace kernel_selector {
 class batch_norm_kernel_selector : public kernel_selector_base {
 public:
    static batch_norm_kernel_selector& Instance() {
        static batch_norm_kernel_selector instance_;
        return instance_;
    }
    batch_norm_kernel_selector();
    virtual ~batch_norm_kernel_selector() {}
    KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_base.cpp
@ -1,72 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "batch_norm_grad_kernel_base.h"
 namespace kernel_selector {
 bool BatchNormGradKernelBase::Validate(const Params& p, const optional_params& o) const {
    if (p.GetType() != KernelType::BATCH_NORM_GRAD ||
        o.GetType() != KernelType::BATCH_NORM_GRAD) {
        return false;
    }
    return true;
 }
 JitConstants BatchNormGradKernelBase::GetJitConstants(const batch_norm_grad_params& params) const {
    JitConstants jit = MakeBaseParamsJitConstants(params);
    return jit;
 }
 BatchNormGradKernelBase::DispatchData BatchNormGradKernelBase::SetDefault(const batch_norm_grad_params& params) const {
    DispatchData kd;
    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
    kd.gws0 = params.inputs[0].Batch().v;
    kd.gws1 = params.inputs[0].Feature().v;
    kd.gws2 = 1;
    kd.lws0 = params.inputs[0].Batch().v;
    kd.lws1 = 1;
    kd.lws2 = 1;
    return kd;
 }
 KernelsData BatchNormGradKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimatedTime) const {
    if (!Validate(params, options)) {
        return {};
    }
    const batch_norm_grad_params& orgParams = static_cast<const batch_norm_grad_params&>(params);
    DispatchData runInfo = SetDefault(orgParams);
    KernelData kd = KernelData::Default<batch_norm_grad_params>(params);
    auto cldnn_jit = GetJitConstants(orgParams);
    auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
    auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
    auto& kernel = kd.kernels[0];
    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 3);
    kd.estimatedTime = estimatedTime;
    return {kd};
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_base.h
@ -1,57 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #pragma once
 #include "common_kernel_base.h"
 #include "kernel_selector_params.h"
 namespace kernel_selector {
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // batch_norm_grad_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct batch_norm_grad_params : public base_params {
    batch_norm_grad_params() : base_params(KernelType::BATCH_NORM_GRAD) {}
    virtual ParamsKey GetParamsKey() const {
        return base_params::GetParamsKey();
    }
 };
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // batch_norm_grad_optional_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct batch_norm_grad_optional_params : optional_params {
    batch_norm_grad_optional_params() : optional_params(KernelType::BATCH_NORM_GRAD) {}
 };
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // BatchNormGradKernelBase
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 class BatchNormGradKernelBase : public common_kernel_base {
 public:
    using common_kernel_base::common_kernel_base;
    virtual ~BatchNormGradKernelBase() {}
    using DispatchData = CommonDispatchData;
 protected:
    bool Validate(const Params& params, const optional_params& options) const override;
    KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const;
    virtual JitConstants GetJitConstants(const batch_norm_grad_params& params) const;
    virtual DispatchData SetDefault(const batch_norm_grad_params& params) const;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_ref.cpp
@ -1,41 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "batch_norm_grad_kernel_ref.h"
 namespace kernel_selector {
 ParamsKey BatchNormGradKernelRef::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::F16);
    k.EnableInputDataType(Datatype::F32);
    k.EnableInputDataType(Datatype::INT8);
    k.EnableOutputDataType(Datatype::F32);
    k.EnableOutputDataType(Datatype::F16);
    k.EnableOutputDataType(Datatype::INT8);
    k.EnableInputLayout(DataLayout::bfyx);
    k.EnableInputLayout(DataLayout::byxf);
    k.EnableInputLayout(DataLayout::yxfb);
    k.EnableOutputLayout(DataLayout::bfyx);
    k.EnableOutputLayout(DataLayout::byxf);
    k.EnableOutputLayout(DataLayout::yxfb);
    k.EnableBatching();
    return k;
 }
 KernelsData BatchNormGradKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
    return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_ref.h
@ -1,30 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #pragma once
 #include "batch_norm_grad_kernel_base.h"
 namespace kernel_selector {
 class BatchNormGradKernelRef : public BatchNormGradKernelBase {
 public:
    BatchNormGradKernelRef() : BatchNormGradKernelBase("batch_norm_grad_gpu_ref") {}
    virtual ~BatchNormGradKernelRef() {}
    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
    ParamsKey GetSupportedKey() const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_selector.cpp
@ -1,29 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "batch_norm_grad_kernel_selector.h"
 #include "batch_norm_grad_kernel_ref.h"
 namespace kernel_selector {
 batch_norm_grad_kernel_selector::batch_norm_grad_kernel_selector() {
    Attach<BatchNormGradKernelRef>();
 }
 KernelsData batch_norm_grad_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
    return GetNaiveBestKernel(params, options, KernelType::BATCH_NORM_GRAD);
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_selector.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_selector.h
@ -1,35 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #pragma once
 #include "kernel_selector.h"
 namespace kernel_selector {
 class batch_norm_grad_kernel_selector : public kernel_selector_base {
 public:
    static batch_norm_grad_kernel_selector& Instance() {
        static batch_norm_grad_kernel_selector instance_;
        return instance_;
    }
    batch_norm_grad_kernel_selector();
    virtual ~batch_norm_grad_kernel_selector() {}
    KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_base.cpp
@ -1,111 +0,0 @@
 // Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "contract_kernel_base.h"
 #include <vector>
 #include "kernel_selector_utils.h"
 namespace kernel_selector {
 JitConstants ContractKernelBase::GetJitConstants(const contract_params& params) const {
    JitConstants jit = MakeBaseParamsJitConstants(params);
    const size_t no_dim_flag = 6;
    std::vector<size_t> output_dims(4, no_dim_flag);
    int out_dim = 2;
    for (int i = 3; i >= 0; --i) {
        if (std::find(params.reduction_axes.begin(), params.reduction_axes.end(), i) == params.reduction_axes.end())
            output_dims.at(i) = out_dim--;
    }
    if (output_dims[3] != no_dim_flag)
        jit.AddConstants({MakeJitConstant("DIM_X", output_dims.at(3))});
    if (output_dims[2] != no_dim_flag)
        jit.AddConstants({MakeJitConstant("DIM_Y", output_dims.at(2))});
    if (output_dims[1] != no_dim_flag)
        jit.AddConstants({MakeJitConstant("DIM_F", output_dims.at(1))});
    if (output_dims[0] != no_dim_flag)
        jit.AddConstants({MakeJitConstant("DIM_B", output_dims.at(0))});
    jit.AddConstants({MakeJitConstant("REDUCE_X", output_dims.at(3) == no_dim_flag),
                      MakeJitConstant("REDUCE_Y", output_dims.at(2) == no_dim_flag),
                      MakeJitConstant("REDUCE_F", output_dims.at(1) == no_dim_flag),
                      MakeJitConstant("REDUCE_B", output_dims.at(0) == no_dim_flag)});
    switch (params.mode) {
        case ContractMode::SUM:
            jit.AddConstants({MakeJitConstant("REDUCE_SEED", "0"), MakeJitConstant("REDUCE_OPERATION(a, b)", "a + b")});
            break;
        case ContractMode::PRODUCT:
            jit.AddConstants({MakeJitConstant("REDUCE_SEED", "1"), MakeJitConstant("REDUCE_OPERATION(a, b)", "a * b")});
            break;
        case ContractMode::ALL:
            jit.AddConstants(
                {MakeJitConstant("REDUCE_SEED", "1"), MakeJitConstant("REDUCE_OPERATION(a, b)", "a && b")});
            break;
        case ContractMode::ANY:
            jit.AddConstants(
                {MakeJitConstant("REDUCE_SEED", "0"), MakeJitConstant("REDUCE_OPERATION(a, b)", "a || b")});
            break;
        case ContractMode::MAX:
            jit.AddConstants({MakeJitConstant("REDUCE_SEED", "UNIT_VAL_MIN"),
                              MakeJitConstant("REDUCE_OPERATION(a, b)", "UNIT_MAX_FUNC(a,b)")});
            break;
    }
    return jit;
 }
 ContractKernelBase::DispatchData ContractKernelBase::SetDefault(const contract_params& params) {
    const auto& output = params.output;
    DispatchData kd;
    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
    std::vector<size_t> global{output.Feature().v, output.Y().v, output.X().v};
    const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
    kd.gws0 = global[0];
    kd.gws1 = global[1];
    kd.gws2 = global[2];
    kd.lws0 = local[0];
    kd.lws1 = local[1];
    kd.lws2 = local[2];
    return kd;
 }
 KernelsData ContractKernelBase::GetCommonKernelsData(const Params& params,
                                                     const optional_params& options,
                                                     float estimated_time) const {
    assert(params.GetType() == KernelType::CONTRACT);
    const auto& prim_params =
        static_cast<const contract_params&>(params);
    auto run_info = SetDefault(prim_params);
    KernelData k_data = KernelData::Default<contract_params>(params);
    auto cldnn_jit = GetJitConstants(prim_params);
    auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
    auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
    auto& kernel = k_data.kernels[0];
    FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point);
    k_data.estimatedTime = estimated_time;
    return {k_data};
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_base.h
@ -1,52 +0,0 @@
 // Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "common_kernel_base.h"
 #include "kernel_selector_params.h"
 #include <vector>
 namespace kernel_selector {
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // contract_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct contract_params : public base_params {
    contract_params() : base_params(KernelType::CONTRACT), mode(ContractMode::ANY) {}
    ContractMode mode;
    std::vector<uint16_t> reduction_axes;
 };
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // contract_optional_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct contract_optional_params : optional_params {
    contract_optional_params() : optional_params(KernelType::CONTRACT) {}
 };
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // ContractKernelBase
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 class ContractKernelBase : public common_kernel_base {
 public:
    using common_kernel_base::common_kernel_base;
    using DispatchData = CommonDispatchData;
 protected:
    JitConstants GetJitConstants(const contract_params& params) const;
    static DispatchData SetDefault(const contract_params& params);
    KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_ref.cpp
@ -1,49 +0,0 @@
 // Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "contract_kernel_ref.h"
 namespace kernel_selector {
 ParamsKey ContractKernelRef::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::F16);
    k.EnableInputDataType(Datatype::F32);
    k.EnableInputDataType(Datatype::INT8);
    k.EnableInputDataType(Datatype::UINT8);
    k.EnableInputDataType(Datatype::INT32);
    k.EnableInputDataType(Datatype::INT64);
    k.EnableOutputDataType(Datatype::F32);
    k.EnableOutputDataType(Datatype::F16);
    k.EnableOutputDataType(Datatype::INT8);
    k.EnableOutputDataType(Datatype::UINT8);
    k.EnableOutputDataType(Datatype::INT32);
    k.EnableOutputDataType(Datatype::INT64);
    k.EnableInputLayout(DataLayout::bfyx);
    k.EnableOutputLayout(DataLayout::bfyx);
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBatching();
    return k;
 }
 KernelsData ContractKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
    return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_ref.h
@ -1,27 +0,0 @@
 // Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "contract_kernel_base.h"
 namespace kernel_selector {
 class ContractKernelRef : public ContractKernelBase {
 public:
    ContractKernelRef() : ContractKernelBase("contract_ref") {}
    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
    ParamsKey GetSupportedKey() const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_selector.cpp
@ -1,24 +0,0 @@
 // Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "contract_kernel_selector.h"
 #include "contract_kernel_ref.h"
 namespace kernel_selector {
 contract_kernel_selector::contract_kernel_selector() { Attach<ContractKernelRef>(); }
 KernelsData contract_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
    return GetNaiveBestKernel(params, options, KernelType::CONTRACT);
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_selector.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_selector.h
@ -1,31 +0,0 @@
 // Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "kernel_selector.h"
 namespace kernel_selector {
 class contract_kernel_selector : public kernel_selector_base {
 public:
    static contract_kernel_selector& Instance() {
        static contract_kernel_selector instance;
        return instance;
    }
    contract_kernel_selector();
    KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.cpp
@ -1,87 +0,0 @@
 /*
 // Copyright (c) 2016-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.h"
 #include "kernel_selector_utils.h"
 namespace kernel_selector {
 ParamsKey ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::INT8);
    k.EnableOutputDataType(Datatype::INT8);
    k.EnableInputWeightsType(WeightsType::INT8);
    k.EnableInputLayout(DataLayout::byx8_f4);
    k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableDilation();
    k.EnableBiasPerFeature();
    k.EnableBatching();
    k.EnableQuantization(QuantizationType::SYMMETRIC);
    k.DisableTuning();
    return k;
 }
 bool ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::Validate(const Params& p, const optional_params& o) const {
    if (!Parent::Validate(p, o)) {
        return false;
    }
    return true;
 }
 size_t static get_wg_batch_size(const convolution_params& params) {
    if (params.inputs[0].Batch().v % 64 == 0)
        return 32;
    return 1;
 }
 ConvolutionKernelBase::DispatchData ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::SetDefault(
    const convolution_params& arg,
    int) const {
    DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
    runInfo.efficiency = FORCE_PRIORITY_1;
    runInfo.gws0 = (arg.output.Batch().v * arg.output.Feature().v) / (4 * 2);
    runInfo.gws1 = arg.output.X().v / 8;
    runInfo.gws2 = arg.output.Y().v / 2;
    runInfo.lws0 = 8 * get_wg_batch_size(arg);
    runInfo.lws1 = 1;
    runInfo.lws2 = 1;
    return runInfo;
 }
 JitConstants ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::GetJitConstants(const convolution_params& params,
                                                                             const DispatchData& kd) const {
    auto jits = ConvolutionKernelBase::GetJitConstants(params, kd);
    jits.AddConstant(MakeJitConstant("WG_BATCH_SIZE", get_wg_batch_size(params)));
    return jits;
 }
 KernelsData ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params,
                                                                           const optional_params& options) const {
    KernelsData kd = GetCommonKernelsData(params, options, " -Dcl_intel_subgroups_char");
    if (!kd.empty())
        kd[0].estimatedTime = FORCE_PRIORITY_3;
    return kd;
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.h
@ -1,41 +0,0 @@
 // Copyright (c) 2016 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "convolution_kernel_base.h"
 #include <vector>
 namespace kernel_selector {
 class ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32 : public ConvolutionKernelBase {
 public:
    using Parent = ConvolutionKernelBase;
    ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32()
        : ConvolutionKernelBase("convolution_gpu_byx8_f4__fs_bs_yx_bsv4_fsv32") {}
    virtual ~ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32() {}
    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
    ParamsKey GetSupportedKey() const override;
 protected:
    bool Validate(const Params& p, const optional_params& o) const override;
    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
    ConvolutionKernelBase::DispatchData SetDefault(const convolution_params& arg, int) const override;
    WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
        return WeightsLayout::os_is_y_x8_osv8_isv4_swizzled_by_4;
    }
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.cpp
@ -1,61 +0,0 @@
 /*
 // Copyright (c) 2016-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.h"
 #include "kernel_selector_utils.h"
 namespace kernel_selector {
 ParamsKey ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::INT8);
    k.EnableOutputDataType(Datatype::INT8);
    k.EnableInputWeightsType(WeightsType::INT8);
    k.EnableInputLayout(DataLayout::byxf);
    k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBiasPerFeature();
    k.EnableNonBiasTerm();
    k.EnableBatching();
    k.EnableQuantization(QuantizationType::SYMMETRIC);
    k.DisableTuning();
    return k;
 }
 ConvolutionKernelBase::DispatchData ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32::SetDefault(
    const convolution_params& arg,
    int) const {
    DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
    runInfo.efficiency = FORCE_PRIORITY_1;
    runInfo.gws0 = (arg.output.Batch().v * arg.output.Feature().v) / 4;
    runInfo.gws1 = arg.output.X().v / 8;
    runInfo.gws2 = arg.output.Y().v;
    runInfo.lws0 = 8;
    runInfo.lws1 = 1;
    runInfo.lws2 = 1;
    return runInfo;
 }
 KernelsData ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params,
                                                                       const optional_params& options) const {
    return GetCommonKernelsData(params, options);
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.h
@ -1,37 +0,0 @@
 // Copyright (c) 2016 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "convolution_kernel_base.h"
 #include <vector>
 namespace kernel_selector {
 class ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32 : public ConvolutionKernelBase {
 public:
    ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32() : ConvolutionKernelBase("convolution_gpu_byxf_fs_bs_yx_bsv4_fsv32") {}
    virtual ~ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32() {}
    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
    ParamsKey GetSupportedKey() const override;
 protected:
    ConvolutionKernelBase::DispatchData SetDefault(const convolution_params& arg, int) const override;
    WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
        return WeightsLayout::yxio;
    }
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_1x1_gemm.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_1x1_gemm.cpp
@ -1,108 +0,0 @@
 /*
 // Copyright (c) 2016-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "convolution_kernel_mmad_1x1_gemm.h"
 namespace kernel_selector {
 ParamsKey ConvolutionKernel_mmad_1x1_gemm::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::INT8);
    k.EnableOutputDataType(Datatype::INT8);
    k.EnableInputWeightsType(WeightsType::INT8);
    k.EnableInputLayout(DataLayout::byxf_af32);
    k.EnableOutputLayout(DataLayout::byxf_af32);
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableDilation();
    k.EnableBiasPerFeature();
    k.EnableBiasPerOutput();
    k.EnableNonBiasTerm();
    k.EnableBatching();
    k.EnableSplitSupport();
    k.EnableDepthwiseSeparableOpt();
    k.EnableQuantization(QuantizationType::SYMMETRIC);
    k.DisableTuning();
    return k;
 }
 bool ConvolutionKernel_mmad_1x1_gemm::Validate(const Params& p, const optional_params& o) const {
    if (!ConvolutionKernelBase::Validate(p, o)) {
        return false;
    }
    const auto& params = static_cast<const convolution_params&>(p);
    if (params.filterSize.x != 1 || params.filterSize.y != 1)
        return false;
    if (params.stride.x != 1 || params.stride.y != 1)
        return false;
    if (params.padding.x != 0 || params.padding.y != 0)
        return false;
    const auto& input = params.inputs[0];
    // we do not support padded input
    if (input.X().pad.Total() != 0 || input.Y().pad.Total() != 0)
        return false;
    if (params.split != 1)
        return false;
    return true;
 }
 ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_1x1_gemm::SetDefault(const convolution_params& arg, int) const {
    DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
    // Sub-group size used by "convolution_1x1_gemm_MMAD" kernel.
    constexpr size_t sub_group_size = 8;
    const auto of_maps = arg.output.Feature().v;
    const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
    runInfo.efficiency = FORCE_PRIORITY_2;
    runInfo.gws0 = RoundUp(arg.output.X().v * arg.output.Y().v, 8) / 8;
    runInfo.gws1 = of_threads_per_batch * arg.output.Batch().v;
    runInfo.gws2 = 1;
    runInfo.lws0 = 1;
    runInfo.lws1 = sub_group_size;
    runInfo.lws2 = 1;
    return runInfo;
 }
 JitConstants ConvolutionKernel_mmad_1x1_gemm::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const {
    auto jit = Parent::GetJitConstants(params, runInfo);
    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws1));
    // pitch for special block format used in this kernel
    const size_t ifm_32_aligned = Align(params.weights.IFM().v, 32);
    const size_t filter_ofm_block_pitch = (ifm_32_aligned / 32) * params.weights.X().v * params.weights.Y().v * 4 * 8 * 8;
    jit.AddConstant(MakeJitConstant("FILTER_OFM_BLOCK_PITCH", filter_ofm_block_pitch));
    return jit;
 }
 KernelsData ConvolutionKernel_mmad_1x1_gemm::GetKernelsData(const Params& params, const optional_params& options) const {
    return GetTunedKernelsDataByIndex(params, options);
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_1x1_gemm.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_1x1_gemm.h
@ -1,40 +0,0 @@
 // Copyright (c) 2016 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "convolution_kernel_base.h"
 #include <vector>
 namespace kernel_selector {
 class ConvolutionKernel_mmad_1x1_gemm : public ConvolutionKernelBase {
 public:
    using Parent = ConvolutionKernelBase;
    ConvolutionKernel_mmad_1x1_gemm() : ConvolutionKernelBase("convolution_gpu_1x1_gemm_MMAD") {}
    virtual ~ConvolutionKernel_mmad_1x1_gemm() {}
    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
    ParamsKey GetSupportedKey() const override;
 protected:
    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
    DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
    bool Validate(const Params& p, const optional_params& o) const override;
    WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
        return WeightsLayout::os_is_yx_isa8_osv8_isv4;
    }
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.cpp
@ -1,180 +0,0 @@
 /*
 // Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.h"
 #include "kernel_selector_utils.h"
 namespace kernel_selector {
 static const size_t _SG_TILE_M = 32;
 static const size_t _SG_TILE_N = 32;
 static const size_t _SG_SIZE = 8;         // sub group size
 static const size_t _TILES_PER_SG_X = 1;  // Persistent threads
 static const size_t _TILES_PER_SG_Y = 1;  // Persistent threads
 ParamsKey ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::INT8);
    k.EnableOutputDataType(Datatype::INT8);
    k.EnableInputWeightsType(WeightsType::INT8);
    k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
    k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBiasPerFeature();
    k.EnableBatching();
    k.EnableQuantization(QuantizationType::SYMMETRIC);
    k.DisableTuning();
    return k;
 }
 bool ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::Validate(const Params& p, const optional_params& o) const {
    if (!ConvolutionKernelBase::Validate(p, o) || !CovolutionCheckInput(p, o)) {
        return false;
    }
    const convolution_params& cp = static_cast<const convolution_params&>(p);
    // make sure it's 1x1 conv
    if (cp.filterSize.x != 1 || cp.filterSize.y != 1)
        return false;
    // make sure stride is 1x1
    if (cp.stride.x != 1 || cp.stride.y != 1)
        return false;
    // input padding not supported
    if (cp.inputs[0].X().pad.Total() != 0 || cp.inputs[0].Y().pad.Total() != 0 ||
        cp.inputs[0].Feature().pad.Total() != 0 || cp.inputs[0].Batch().pad.Total() != 0)
        return false;
    // input and output spatial sizes must match
    if (!(cp.output.X().v == cp.inputs[0].X().v) || !(cp.output.Y().v == cp.inputs[0].Y().v))
        return false;
    const auto m = cp.output.X().v * cp.output.Y().v * cp.output.Batch().v;
    const auto k = cp.inputs[0].Feature().v;
    const auto n = cp.output.Feature().v;
    if (m % 32 != 0 && m % 128 != 0)  // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M=128
        return false;
    if (k % 32 != 0)  // Matrix size K, Must be mutliple of 32
        return false;
    if (n % 32 != 0 && n % 128 != 0)  // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N=128
        return false;
    return true;
 }
 ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::SetDefault(
    const convolution_params& arg,
    int) const {
    DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
    runInfo.efficiency = FORCE_PRIORITY_1;
    size_t mat_m = arg.output.X().v * arg.output.Y().v * arg.output.Batch().v;
    size_t mat_n = arg.output.Feature().v;
    size_t _MATRIX_M = mat_m;
    size_t _MATRIX_N = mat_n;
    size_t _WG_TILE_M = 128;
    size_t _WG_TILE_N = 128;
    // Calculate number of threads needed
    const size_t threadsX = (_MATRIX_N / (_SG_TILE_N / _SG_SIZE)) / _TILES_PER_SG_X;
    const size_t threadsY = (_MATRIX_M / _SG_TILE_M) / _TILES_PER_SG_Y;
    // Define execution setup for kernel:
    size_t globalWorkSize[3] = {threadsX, threadsY, 1};
    size_t localWorkSize[3] = {_SG_SIZE * _WG_TILE_N / _SG_TILE_N, _WG_TILE_M / _SG_TILE_M, 1};
    runInfo.gws0 = globalWorkSize[0];
    runInfo.gws1 = globalWorkSize[1];
    runInfo.gws2 = globalWorkSize[2];
    runInfo.lws0 = localWorkSize[0];
    runInfo.lws1 = localWorkSize[1];
    runInfo.lws2 = localWorkSize[2];
    return runInfo;
 }
 JitConstants ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::GetJitConstants(const convolution_params& params,
                                                                                const DispatchData& runInfo) const {
    auto jit = Parent::GetJitConstants(params, runInfo);
    jit.AddConstant(MakeJitConstant("WG_TILE_M", 128));     // Work-Group tile size M, Must be mutliple of 32
    jit.AddConstant(MakeJitConstant("WG_TILE_N", 128));     // Work-Group tile size N, Must be mutliple of 32
    jit.AddConstant(MakeJitConstant("TILES_PER_SG_X", 1));  // Persistent threads
    jit.AddConstant(MakeJitConstant("TILES_PER_SG_Y", 1));  // Persistent threads
    // Do not change values below
    jit.AddConstant(MakeJitConstant("DIM_X", 0));
    jit.AddConstant(MakeJitConstant("DIM_Y", 1));
    jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K", 32));
    jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K_BFLOAT", 16));
    jit.AddConstant(MakeJitConstant("SG_TILE_M", _SG_TILE_M));
    jit.AddConstant(MakeJitConstant("SG_TILE_N", _SG_TILE_N));
    jit.AddConstant(MakeJitConstant("SG_SIZE", _SG_SIZE));
    jit.AddConstant(MakeJitConstant("SIMD_LANE_M", "SG_TILE_M"));
    jit.AddConstant(MakeJitConstant("SIMD_LANE_N", "(SG_TILE_N / SG_SIZE)"));
    jit.AddConstant(MakeJitConstant("WG_SIZE", "(SG_SIZE * WG_TILE_N / SG_TILE_N) * (WG_TILE_M / SG_TILE_M)"));
    jit.AddConstant(MakeJitConstant("COMPILE_KERNELS", ""));
    jit.AddConstant(MakeJitConstant("TILED_GLOBAL_LAYOUT", ""));
    jit.AddConstant(MakeJitConstant("OUTPUT_TILED_GLOBAL_LAYOUT", ""));
    const auto& input = params.inputs[0];
    const auto& output = params.output;
    auto m = output.X().v * output.Y().v * output.Batch().v;
    auto k = input.Feature().v;
    auto n = output.Feature().v;
    jit.AddConstant(MakeJitConstant("MATRIX_M", m));
    jit.AddConstant(MakeJitConstant("MATRIX_K", k));
    jit.AddConstant(MakeJitConstant("MATRIX_N", n));
    const size_t out_x_pitch = 32 * 4;
    const size_t out_y_pitch = 32 * 4 * params.output.X().LogicalDimPadded();
    const size_t out_b_block_pitch = out_y_pitch * params.output.Y().LogicalDimPadded();
    const size_t out_f_block_pitch = out_b_block_pitch * ((params.output.Batch().v + 3) / 4);
    const size_t out_offset = out_x_pitch * params.output.X().pad.before + out_y_pitch * params.output.Y().pad.before;
    jit.AddConstant(MakeJitConstant("OUT_X_PITCH", out_x_pitch));
    jit.AddConstant(MakeJitConstant("OUT_Y_PITCH", out_y_pitch));
    jit.AddConstant(MakeJitConstant("OUT_B_BLOCK_PITCH", out_b_block_pitch));
    jit.AddConstant(MakeJitConstant("OUT_F_BLOCK_PITCH", out_f_block_pitch));
    jit.AddConstant(MakeJitConstant("OUT_OFFSET", out_offset));
    bool out_padding = output.X().pad.Total() != 0 || output.Y().pad.Total() != 0;
    jit.AddConstant(MakeJitConstant("OUT_WITH_PADDING", out_padding));
    return jit;
 }
 KernelsData ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::GetKernelsData(const Params& params,
                                                                              const optional_params& options) const {
    KernelsData kd = GetCommonKernelsData(params, options);
    if (!kd.empty())
        kd[0].estimatedTime = FORCE_PRIORITY_1;  // _3
    return kd;
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.h
@ -1,42 +0,0 @@
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "convolution_kernel_base.h"
 #include <vector>
 namespace kernel_selector {
 class ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8 : public ConvolutionKernelBase {
 public:
    using Parent = ConvolutionKernelBase;
    ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8()
        : ConvolutionKernelBase("convolution_gpu_mmad_32x32sg_128x128wg_slm_int8") {}
    virtual ~ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8() {}
    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
    ParamsKey GetSupportedKey() const override;
 protected:
    bool Validate(const Params& p, const optional_params& o) const override;
    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
    DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
    WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
        return WeightsLayout::is_o32_yx_isv32_swizzled_by_4;
    }
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.cpp
@ -1,180 +0,0 @@
 /*
 // Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.h"
 #include "kernel_selector_utils.h"
 namespace kernel_selector {
 static const size_t _SG_TILE_M = 32;
 static const size_t _SG_TILE_N = 32;
 static const size_t _SG_SIZE = 8;         // sub group size
 static const size_t _TILES_PER_SG_X = 1;  // Persistent threads
 static const size_t _TILES_PER_SG_Y = 1;  // Persistent threads
 ParamsKey ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::INT8);
    k.EnableOutputDataType(Datatype::INT8);
    k.EnableInputWeightsType(WeightsType::INT8);
    k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
    k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBiasPerFeature();
    k.EnableBatching();
    k.EnableQuantization(QuantizationType::SYMMETRIC);
    k.DisableTuning();
    return k;
 }
 bool ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::Validate(const Params& p, const optional_params& o) const {
    if (!ConvolutionKernelBase::Validate(p, o) || !CovolutionCheckInput(p, o)) {
        return false;
    }
    const convolution_params& cp = static_cast<const convolution_params&>(p);
    // make sure it's 1x1 conv
    if (cp.filterSize.x != 1 || cp.filterSize.y != 1)
        return false;
    // make sure stride is 1x1
    if (cp.stride.x != 1 || cp.stride.y != 1)
        return false;
    // input padding not supported
    if (cp.inputs[0].X().pad.Total() != 0 || cp.inputs[0].Y().pad.Total() != 0 ||
        cp.inputs[0].Feature().pad.Total() != 0 || cp.inputs[0].Batch().pad.Total() != 0)
        return false;
    // input and output spatial sizes must match
    if (!(cp.output.X().v == cp.inputs[0].X().v) || !(cp.output.Y().v == cp.inputs[0].Y().v))
        return false;
    const auto m = cp.output.X().v * cp.output.Y().v * cp.output.Batch().v;
    const auto k = cp.inputs[0].Feature().v;
    const auto n = cp.output.Feature().v;
    if (m % 32 != 0 && m % 224 != 0)  // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M=128
        return false;
    if (k % 32 != 0)  // Matrix size K, Must be mutliple of 32
        return false;
    if (n % 32 != 0 && n % 128 != 0)  // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N=128
        return false;
    return true;
 }
 ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::SetDefault(
    const convolution_params& arg,
    int) const {
    DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
    runInfo.efficiency = FORCE_PRIORITY_1;
    size_t mat_m = arg.output.X().v * arg.output.Y().v * arg.output.Batch().v;
    size_t mat_n = arg.output.Feature().v;
    size_t _MATRIX_M = mat_m;
    size_t _MATRIX_N = mat_n;
    size_t _WG_TILE_M = 224;
    size_t _WG_TILE_N = 128;
    // Calculate number of threads needed
    const size_t threadsX = (_MATRIX_N / (_SG_TILE_N / _SG_SIZE)) / _TILES_PER_SG_X;
    const size_t threadsY = (_MATRIX_M / _SG_TILE_M) / _TILES_PER_SG_Y;
    // Define execution setup for kernel:
    size_t globalWorkSize[3] = {threadsX, threadsY, 1};
    size_t localWorkSize[3] = {_SG_SIZE * _WG_TILE_N / _SG_TILE_N, _WG_TILE_M / _SG_TILE_M, 1};
    runInfo.gws0 = globalWorkSize[0];
    runInfo.gws1 = globalWorkSize[1];
    runInfo.gws2 = globalWorkSize[2];
    runInfo.lws0 = localWorkSize[0];
    runInfo.lws1 = localWorkSize[1];
    runInfo.lws2 = localWorkSize[2];
    return runInfo;
 }
 JitConstants ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::GetJitConstants(const convolution_params& params,
                                                                                const DispatchData& runInfo) const {
    auto jit = Parent::GetJitConstants(params, runInfo);
    jit.AddConstant(MakeJitConstant("WG_TILE_M", 224));  // Work-Group tile size M, Must be mutliple of 32
    jit.AddConstant(MakeJitConstant("WG_TILE_N", 128));  // Work-Group tile size N, Must be mutliple of 32
    jit.AddConstant(MakeJitConstant("TILES_PER_SG_X", _TILES_PER_SG_X));
    jit.AddConstant(MakeJitConstant("TILES_PER_SG_Y", _TILES_PER_SG_Y));
    // Do not change values below
    jit.AddConstant(MakeJitConstant("DIM_X", 0));
    jit.AddConstant(MakeJitConstant("DIM_Y", 1));
    jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K", 32));
    jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K_BFLOAT", 16));
    jit.AddConstant(MakeJitConstant("SG_TILE_M", _SG_TILE_M));
    jit.AddConstant(MakeJitConstant("SG_TILE_N", _SG_TILE_N));
    jit.AddConstant(MakeJitConstant("SG_SIZE", _SG_SIZE));
    jit.AddConstant(MakeJitConstant("SIMD_LANE_M", "SG_TILE_M"));
    jit.AddConstant(MakeJitConstant("SIMD_LANE_N", "(SG_TILE_N / SG_SIZE)"));
    jit.AddConstant(MakeJitConstant("WG_SIZE", "(SG_SIZE * WG_TILE_N / SG_TILE_N) * (WG_TILE_M / SG_TILE_M)"));
    jit.AddConstant(MakeJitConstant("COMPILE_KERNELS", ""));
    jit.AddConstant(MakeJitConstant("TILED_GLOBAL_LAYOUT", ""));
    jit.AddConstant(MakeJitConstant("OUTPUT_TILED_GLOBAL_LAYOUT", ""));
    const auto& input = params.inputs[0];
    const auto& output = params.output;
    auto m = output.X().v * output.Y().v * output.Batch().v;
    auto k = input.Feature().v;
    auto n = output.Feature().v;
    jit.AddConstant(MakeJitConstant("MATRIX_M", m));  // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M
    jit.AddConstant(MakeJitConstant("MATRIX_K", k));  // Matrix size K, Must be mutliple of 32
    jit.AddConstant(MakeJitConstant("MATRIX_N", n));  // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N
    const size_t out_x_pitch = 32 * 4;
    const size_t out_y_pitch = 32 * 4 * params.output.X().LogicalDimPadded();
    const size_t out_b_block_pitch = out_y_pitch * params.output.Y().LogicalDimPadded();
    const size_t out_f_block_pitch = out_b_block_pitch * ((params.output.Batch().v + 3) / 4);
    const size_t out_offset = out_x_pitch * params.output.X().pad.before + out_y_pitch * params.output.Y().pad.before;
    jit.AddConstant(MakeJitConstant("OUT_X_PITCH", out_x_pitch));
    jit.AddConstant(MakeJitConstant("OUT_Y_PITCH", out_y_pitch));
    jit.AddConstant(MakeJitConstant("OUT_B_BLOCK_PITCH", out_b_block_pitch));
    jit.AddConstant(MakeJitConstant("OUT_F_BLOCK_PITCH", out_f_block_pitch));
    jit.AddConstant(MakeJitConstant("OUT_OFFSET", out_offset));
    bool out_padding = output.X().pad.Total() != 0 || output.Y().pad.Total() != 0;
    jit.AddConstant(MakeJitConstant("OUT_WITH_PADDING", out_padding));
    return jit;
 }
 KernelsData ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::GetKernelsData(const Params& params,
                                                                              const optional_params& options) const {
    KernelsData kd = GetCommonKernelsData(params, options);
    if (!kd.empty())
        kd[0].estimatedTime = FORCE_PRIORITY_1;  // _3
    return kd;
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.h
@ -1,42 +0,0 @@
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "convolution_kernel_base.h"
 #include <vector>
 namespace kernel_selector {
 class ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8 : public ConvolutionKernelBase {
 public:
    using Parent = ConvolutionKernelBase;
    ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8()
        : ConvolutionKernelBase("convolution_gpu_mmad_32x32sg_224x128wg_slm_int8") {}
    virtual ~ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8() {}
    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
    ParamsKey GetSupportedKey() const override;
 protected:
    bool Validate(const Params& p, const optional_params& o) const override;
    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
    DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
    WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
        return WeightsLayout::is_o32_yx_isv32_swizzled_by_4;
    }
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_slm_int8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_slm_int8.cpp
@ -1,176 +0,0 @@
 /*
 // Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "convolution_kernel_mmad_32x32sg_slm_int8.h"
 #include "kernel_selector_utils.h"
 namespace kernel_selector {
 static const size_t _SG_TILE_M = 32;
 static const size_t _SG_TILE_N = 32;
 static const size_t _SG_SIZE = 8;         // sub group size
 static const size_t _TILES_PER_SG_X = 1;  // Persistent threads
 static const size_t _TILES_PER_SG_Y = 1;  // Persistent threads
 ParamsKey ConvolutionKernel_mmad_32x32sg_slm_int8::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::INT8);
    k.EnableOutputDataType(Datatype::INT8);
    k.EnableInputWeightsType(WeightsType::INT8);
    k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
    k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBiasPerFeature();
    k.EnableBatching();
    k.EnableQuantization(QuantizationType::SYMMETRIC);
    k.DisableTuning();
    return k;
 }
 bool ConvolutionKernel_mmad_32x32sg_slm_int8::Validate(const Params& p, const optional_params& o) const {
    if (!ConvolutionKernelBase::Validate(p, o) || !CovolutionCheckInput(p, o)) {
        return false;
    }
    const convolution_params& cp = static_cast<const convolution_params&>(p);
    // make sure it's 1x1 conv
    if (cp.filterSize.x != 1 || cp.filterSize.y != 1)
        return false;
    // make sure stride is 1x1
    if (cp.stride.x != 1 || cp.stride.y != 1)
        return false;
    // input padding not supported
    if (cp.inputs[0].X().pad.Total() != 0 || cp.inputs[0].Y().pad.Total() != 0 ||
        cp.inputs[0].Feature().pad.Total() != 0 || cp.inputs[0].Batch().pad.Total() != 0)
        return false;
    // input and output spatial sizes must match
    if (!(cp.output.X().v == cp.inputs[0].X().v) || !(cp.output.Y().v == cp.inputs[0].Y().v))
        return false;
    const auto m = cp.output.X().v * cp.output.Y().v * cp.output.Batch().v;
    const auto k = cp.inputs[0].Feature().v;
    const auto n = cp.output.Feature().v;
    if (m % 32 != 0)  // Matrix size M, Must be mutliple of 32
        return false;
    if (k % 32 != 0)  // Matrix size K, Must be multiple of 32
        return false;
    if (n % 32 != 0)  // Matrix size N, Must be mutliple of 32
        return false;
    return true;
 }
 ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_32x32sg_slm_int8::SetDefault(const convolution_params& arg,
                                                                                        int) const {
    DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
    runInfo.efficiency = FORCE_PRIORITY_2;
    size_t mat_m = arg.output.X().v * arg.output.Y().v * arg.output.Batch().v;
    size_t mat_n = arg.output.Feature().v;
    size_t _MATRIX_M = mat_m;
    size_t _MATRIX_N = mat_n;
    size_t _WG_TILE_M = 32;
    size_t _WG_TILE_N = 32;
    // Calculate number of threads needed
    const size_t threadsX = (_MATRIX_N / (_SG_TILE_N / _SG_SIZE)) / _TILES_PER_SG_X;
    const size_t threadsY = (_MATRIX_M / _SG_TILE_M) / _TILES_PER_SG_Y;
    // Define execution setup for kernel:
    size_t globalWorkSize[3] = {threadsX, threadsY, 1};
    size_t localWorkSize[3] = {_SG_SIZE * _WG_TILE_N / _SG_TILE_N, _WG_TILE_M / _SG_TILE_M, 1};
    runInfo.gws0 = globalWorkSize[0];
    runInfo.gws1 = globalWorkSize[1];
    runInfo.gws2 = globalWorkSize[2];
    runInfo.lws0 = localWorkSize[0];
    runInfo.lws1 = localWorkSize[1];
    runInfo.lws2 = localWorkSize[2];
    return runInfo;
 }
 JitConstants ConvolutionKernel_mmad_32x32sg_slm_int8::GetJitConstants(const convolution_params& params,
                                                                      const DispatchData& runInfo) const {
    auto jit = Parent::GetJitConstants(params, runInfo);
    jit.AddConstant(MakeJitConstant("WG_TILE_M", 32));  // Work-Group tile size M, Must be mutliple of 32
    jit.AddConstant(MakeJitConstant("WG_TILE_N", 32));  // Work-Group tile size N, Must be mutliple of 32
    jit.AddConstant(MakeJitConstant("TILES_PER_SG_X", _TILES_PER_SG_X));
    jit.AddConstant(MakeJitConstant("TILES_PER_SG_Y", _TILES_PER_SG_Y));
    // Do not change values below
    jit.AddConstant(MakeJitConstant("DIM_X", 0));
    jit.AddConstant(MakeJitConstant("DIM_Y", 1));
    jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K", 32));
    jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K_BFLOAT", 16));
    jit.AddConstant(MakeJitConstant("SG_TILE_M", _SG_TILE_M));
    jit.AddConstant(MakeJitConstant("SG_TILE_N", _SG_TILE_N));
    jit.AddConstant(MakeJitConstant("SG_SIZE", _SG_SIZE));
    jit.AddConstant(MakeJitConstant("SIMD_LANE_M", "SG_TILE_M"));
    jit.AddConstant(MakeJitConstant("SIMD_LANE_N", "(SG_TILE_N / SG_SIZE)"));
    jit.AddConstant(MakeJitConstant("WG_SIZE", "(SG_SIZE * WG_TILE_N / SG_TILE_N) * (WG_TILE_M / SG_TILE_M)"));
    jit.AddConstant(MakeJitConstant("COMPILE_KERNELS", ""));
    jit.AddConstant(MakeJitConstant("TILED_GLOBAL_LAYOUT", ""));
    jit.AddConstant(MakeJitConstant("OUTPUT_TILED_GLOBAL_LAYOUT", ""));
    const auto& input = params.inputs[0];
    const auto& output = params.output;
    auto m = output.X().v * output.Y().v * output.Batch().v;
    auto k = input.Feature().v;
    auto n = output.Feature().v;
    jit.AddConstant(MakeJitConstant("MATRIX_M", m));  // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M
    jit.AddConstant(MakeJitConstant("MATRIX_K", k));  // Matrix size K, Must be mutliple of 32
    jit.AddConstant(MakeJitConstant("MATRIX_N", n));  // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N
    const size_t out_x_pitch = 32 * 4;
    const size_t out_y_pitch = 32 * 4 * params.output.X().LogicalDimPadded();
    const size_t out_b_block_pitch = out_y_pitch * params.output.Y().LogicalDimPadded();
    const size_t out_f_block_pitch = out_b_block_pitch * ((params.output.Batch().v + 3) / 4);
    const size_t out_offset = out_x_pitch * params.output.X().pad.before + out_y_pitch * params.output.Y().pad.before;
    jit.AddConstant(MakeJitConstant("OUT_X_PITCH", out_x_pitch));
    jit.AddConstant(MakeJitConstant("OUT_Y_PITCH", out_y_pitch));
    jit.AddConstant(MakeJitConstant("OUT_B_BLOCK_PITCH", out_b_block_pitch));
    jit.AddConstant(MakeJitConstant("OUT_F_BLOCK_PITCH", out_f_block_pitch));
    jit.AddConstant(MakeJitConstant("OUT_OFFSET", out_offset));
    return jit;
 }
 KernelsData ConvolutionKernel_mmad_32x32sg_slm_int8::GetKernelsData(const Params& params,
                                                                    const optional_params& options) const {
    KernelsData kd = GetCommonKernelsData(params, options);
    if (!kd.empty())
        kd[0].estimatedTime = FORCE_PRIORITY_2;  // _3
    return kd;
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_slm_int8.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_slm_int8.h
@ -1,41 +0,0 @@
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "convolution_kernel_base.h"
 #include <vector>
 namespace kernel_selector {
 class ConvolutionKernel_mmad_32x32sg_slm_int8 : public ConvolutionKernelBase {
 public:
    using Parent = ConvolutionKernelBase;
    ConvolutionKernel_mmad_32x32sg_slm_int8() : ConvolutionKernelBase("convolution_gpu_mmad_32x32sg_slm_int8") {}
    virtual ~ConvolutionKernel_mmad_32x32sg_slm_int8() {}
    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
    ParamsKey GetSupportedKey() const override;
 protected:
    bool Validate(const Params& p, const optional_params& o) const override;
    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
    DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
    WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
        return WeightsLayout::is_o_yx_isv32;
    }
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp
@ -35,19 +35,8 @@
 #include "convolution_kernel_winograd_6x3_s1_fused.h"
 #include "convolution_kernel_mmad.h"
 #include "convolution_kernel_mmad_blocks.h"
 #include "convolution_kernel_mmad_1x1_gemm.h"
 #include "convolution_kernel_imad_byxf_af32_depthwise.h"
 #include "convolution_kernel_mmad_batched.h"
 #include "convolution_kernel_bfyx_depthwise_weights_lwg.h"
 #include "convolution_kernel_mmad_slm_2x14_rep4.h"
 #include "convolution_kernel_mmad_slm_7x7_rep4.h"
 #include "convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.h"
 #include "convolution_kernel_mmad_batched_block.h"
 #include "convolution_kernel_mmad_batched_block_1x1.h"
 #include "convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.h"
 #include "convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.h"
 #include "convolution_kernel_mmad_32x32sg_slm_int8.h"
 #include "convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.h"
 #include "convolution_kernel_imad.h"
 #include "convolution_kernel_fs_byx_fsv32.h"
 #include "convolution_kernel_fs_byx_fsv32_1x1.h"
@ -134,19 +123,6 @@ convolution_kernel_selector::convolution_kernel_selector() {
    Attach<ConvolutionKernel_mmad_blocks>();
    Attach<ConvolutionKernel_imad_byxf_af32_1x1>();
    Attach<ConvolutionKernel_imad_byxf_af32_depthiwise>();
    Attach<ConvolutionKernel_mmad_1x1_gemm>();
    // fs_bs_yx_bsv4_fsv32 int8
    Attach<ConvolutionKernel_mmad_batched>();
    Attach<ConvolutionKernel_mmad_slm_2x14_rep4>();
    Attach<ConvolutionKernel_mmad_slm_7x7_rep4>();
    Attach<ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8>();
    Attach<ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8>();
    Attach<ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32>();
    Attach<ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32>();
    Attach<ConvolutionKernel_mmad_batched_block>();
    Attach<ConvolutionKernel_mmad_batched_block_1x1>();
    // Attach<ConvolutionKernel_mmad_32x32sg_slm_int8>();
    // b_fs_yx_fsv4 kernels
    Attach<ConvolutionKernel_imad>();
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.cpp
@ -1,67 +0,0 @@
 /*
 // Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "convolution_grad_weights_kernel_1x1.h"
 namespace kernel_selector {
 ParamsKey ConvolutionGradWeightsKernel1x1::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::F32);
    k.EnableInputWeightsType(WeightsType::F32);
    k.EnableOutputDataType(Datatype::F32);
    k.EnableInputLayout(DataLayout::bfyx);
    k.EnableOutputLayout(DataLayout::yxfb);
    k.EnableOutputLayout(DataLayout::bfyx);
    k.EnableOutputLayout(DataLayout::byxf);
    k.EnableSubGroup();
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBiasPerFeature();
    k.EnableNonBiasTerm();
    k.EnableMomentum();
    k.EnableBatching();
    k.EnableSplitSupport();
    k.EnableGradient();
    k.DisableTuning();
    return k;
 }
 bool ConvolutionGradWeightsKernel1x1::Validate(const Params& p, const optional_params&) const {
    const convolution_grad_weights_params& params = static_cast<const convolution_grad_weights_params&>(p);
    if (params.filterSize.x != 1 || params.filterSize.y != 1)
        return false;
    return true;
 }
 ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel1x1::SetDefault(
    const convolution_grad_weights_params& params) const {
    auto input_features = params.weights.IFM().v;
    auto output_features = params.weights.OFM().v;
    DispatchData kd;
    kd.gws0 = 16;
    kd.gws1 = input_features;
    kd.gws2 = output_features;
    kd.lws0 = 16;
    kd.lws1 = 1;
    kd.lws2 = 1;
    kd.efficiency = FORCE_PRIORITY_8;
    return kd;
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.h
@ -1,32 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #pragma once
 #include "convolution_grad_weights_kernel_base.h"
 namespace kernel_selector {
 class ConvolutionGradWeightsKernel1x1 : public ConvolutionGradWeightsKernelBase {
 public:
    ConvolutionGradWeightsKernel1x1() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_1x1") {}
    virtual ~ConvolutionGradWeightsKernel1x1() {}
    DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
    bool Validate(const Params& p, const optional_params& o) const override;
    ParamsKey GetSupportedKey() const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.cpp
@ -1,72 +0,0 @@
 /*
 // Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "convolution_grad_weights_kernel_3x3.h"
 #include <algorithm>
 namespace kernel_selector {
 ParamsKey ConvolutionGradWeightsKernel3x3::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::F32);
    k.EnableInputWeightsType(WeightsType::F32);
    k.EnableOutputDataType(Datatype::F32);
    k.EnableInputLayout(DataLayout::bfyx);
    k.EnableOutputLayout(DataLayout::yxfb);
    k.EnableOutputLayout(DataLayout::bfyx);
    k.EnableOutputLayout(DataLayout::byxf);
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBiasPerFeature();
    k.EnableNonBiasTerm();
    k.EnableMomentum();
    k.EnableBatching();
    k.EnableSplitSupport();
    k.EnableGradient();
    k.DisableTuning();
    return k;
 }
 bool ConvolutionGradWeightsKernel3x3::Validate(const Params& p, const optional_params&) const {
    const auto& params = static_cast<const convolution_grad_weights_params&>(p);
    if (params.stride.x != 1 || params.stride.y != 1)
        return false;
    if (params.filterSize.x != 3 || params.filterSize.y != 3)
        return false;
    return true;
 }
 ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel3x3::SetDefault(
    const convolution_grad_weights_params& params) const {
    auto input_features = params.weights.IFM().v;
    auto output_features = params.weights.OFM().v;
    DispatchData kd;
    kd.gws0 = Align(output_features, 16);
    kd.gws1 = input_features;
    kd.gws2 = 1;
    kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
    while (kd.gws0 % kd.lws0 != 0) {
        kd.lws0 -= 16;
    }
    kd.lws1 = 1;
    kd.lws2 = 1;
    kd.efficiency = FORCE_PRIORITY_8;
    return kd;
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.h
@ -1,32 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #pragma once
 #include "convolution_grad_weights_kernel_base.h"
 namespace kernel_selector {
 class ConvolutionGradWeightsKernel3x3 : public ConvolutionGradWeightsKernelBase {
 public:
    ConvolutionGradWeightsKernel3x3() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_3x3") {}
    virtual ~ConvolutionGradWeightsKernel3x3() {}
    DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
    bool Validate(const Params& p, const optional_params& o) const override;
    ParamsKey GetSupportedKey() const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.cpp
@ -1,70 +0,0 @@
 /*
 // Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "convolution_grad_weights_kernel_7x7.h"
 #include <algorithm>
 namespace kernel_selector {
 ParamsKey ConvolutionGradWeightsKernel7x7::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::F32);
    k.EnableInputWeightsType(WeightsType::F32);
    k.EnableOutputDataType(Datatype::F32);
    k.EnableInputLayout(DataLayout::bfyx);
    k.EnableOutputLayout(DataLayout::yxfb);
    k.EnableOutputLayout(DataLayout::bfyx);
    k.EnableOutputLayout(DataLayout::byxf);
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBiasPerFeature();
    k.EnableNonBiasTerm();
    k.EnableMomentum();
    k.EnableBatching();
    k.EnableSplitSupport();
    k.EnableGradient();
    k.DisableTuning();
    return k;
 }
 bool ConvolutionGradWeightsKernel7x7::Validate(const Params& p, const optional_params&) const {
    const auto& params = static_cast<const convolution_grad_weights_params&>(p);
    if (params.filterSize.x != 7 || params.filterSize.y != 7)
        return false;
    return true;
 }
 ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel7x7::SetDefault(
    const convolution_grad_weights_params& params) const {
    auto input_features = params.weights.IFM().v;
    auto output_features = params.weights.OFM().v;
    DispatchData kd;
    kd.gws0 = 8;
    kd.gws1 = Align(output_features, 16);
    kd.gws2 = input_features;
    kd.lws0 = 1;
    kd.lws1 = std::min(std::max(kd.gws1, static_cast<size_t>(1)), static_cast<size_t>(32));
    while (kd.gws1 % kd.lws1 != 0) {
        kd.lws1 -= 16;
    }
    kd.lws2 = 1;
    kd.efficiency = FORCE_PRIORITY_8;
    return kd;
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.h
@ -1,32 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #pragma once
 #include "convolution_grad_weights_kernel_base.h"
 namespace kernel_selector {
 class ConvolutionGradWeightsKernel7x7 : public ConvolutionGradWeightsKernelBase {
 public:
    ConvolutionGradWeightsKernel7x7() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_7x7") {}
    virtual ~ConvolutionGradWeightsKernel7x7() {}
    DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
    bool Validate(const Params& p, const optional_params& o) const override;
    ParamsKey GetSupportedKey() const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp
@ -1,135 +0,0 @@
 // Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "convolution_grad_weights_kernel_base.h"
 #include "kernel_selector_utils.h"
 #include <string>
 #include <vector>
 #include <algorithm>
 namespace kernel_selector {
 std::string convolution_grad_weights_params::to_string() const {
    std::stringstream s;
    s << base_params::to_string() << "_";
    if (bias.empty()) {
        s << "no_bias"
          << "_";
    } else {
        s << "bias_" << bias[0].PhysicalSize() << "_";
    }
    s << filterSize.x << "_" << filterSize.y << "_";
    s << stride.x << "_" << stride.y << "_";
    s << dilation.x << "_" << dilation.y << "_";
    s << padding.x << "_" << padding.y << "_";
    s << split;
    return s.str();
 }
 JitConstants ConvolutionGradWeightsKernelBase::GetJitConstants(const convolution_grad_weights_params& cp) const {
    JitConstants jit = training_kernel_base::GetJitConstants(cp);
    const auto& padding = cp.padding;
    const auto& input = cp.inputs[0];
    int64_t input_offset_with_padding = (int64_t)input.GetFirstElementOffset() -
                                        (cp.filterSize.x - 1 + padding.x) * input.X().pitch -
                                        (cp.filterSize.y - 1 + padding.y) * input.Y().pitch;
    input_offset_with_padding = std::max(input_offset_with_padding, (int64_t)0);
    jit.AddConstants({
        MakeJitConstant("STRIDE", cp.stride),
        MakeJitConstant("PADDING", cp.padding),
        MakeJitConstant("DILATION", cp.dilation),
        MakeJitConstant("FILTER_ARRAY_NUM", cp.split),
        MakeJitConstant("INPUT0_OFFSET_WITH_PADDING", input_offset_with_padding),
        MakeJitConstant("DEPTHWISE_SEPARABLE_OPT", cp.depthwise_separable_opt),
        MakeJitConstant("OUTPUT_GRAD_W", cp.output_grad_w),
    });
    return jit;
 }
 ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernelBase::SetDefault(
    const convolution_grad_weights_params& params) const {
    auto input_features = params.weights.IFM().v;
    auto output_features = params.weights.OFM().v;
    DispatchData kd;
    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
    size_t gws0 = output_features * input_features;
    size_t lws0 = std::min(gws0, static_cast<size_t>(32));
    while (gws0 % lws0) {
        lws0--;
    }
    kd.gws0 = gws0;
    kd.gws1 = params.weights.X().v;
    kd.gws2 = params.weights.Y().v;
    kd.lws0 = lws0;
    kd.lws1 = 1;
    kd.lws2 = 1;
    kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
    return kd;
 }
 KernelsData ConvolutionGradWeightsKernelBase::GetKernelsData(const Params& params,
                                                             const optional_params& options) const {
    assert(params.GetType() == KernelType::CONVOLUTION_GRAD_WEIGHTS);
    if (!Validate(params, options)) {
        return {};
    }
    const convolution_grad_weights_params& orgParams = static_cast<const convolution_grad_weights_params&>(params);
    DispatchData runInfo = SetDefault(orgParams);
    KernelData kd = KernelData::Default<convolution_grad_weights_params>(params);
    convolution_grad_weights_params& newParams = *static_cast<convolution_grad_weights_params*>(kd.params.get());
    bool succeed = UpdateWeightsParams(newParams, options, WeightsLayout::oiyx, kd.weightsReorderParams);
    if (!succeed) {
        return {};
    }
    auto cldnn_jit = GetJitConstants(orgParams);
    auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
    auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
    auto& kernel = kd.kernels[0];
    FillCLKernelData(kernel,
                     runInfo,
                     params.engineInfo,
                     kernelName,
                     jit,
                     entry_point,
                     DEFAULT,
                     true,
                     !orgParams.bias.empty());
    if (newParams.use_momentum) {
        kernel.arguments.push_back({ArgumentDescriptor::Types::PREV_WEIGHTS_GRADIENT, 0});
        if (!newParams.bias.empty())
            kernel.arguments.push_back({ArgumentDescriptor::Types::PREV_BIAS_GRADIENT, 0});
    }
    kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
    kernel.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0});
    kernel.arguments.push_back({ArgumentDescriptor::Types::LEARNING_RATE, 0});
    kd.estimatedTime = runInfo.efficiency;
    return {kd};
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.h
@ -1,79 +0,0 @@
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "training_kernel_base.h"
 #include "kernel_selector_params.h"
 #include <string>
 namespace kernel_selector {
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // convolution_grad_weights_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct convolution_grad_weights_params : public training_params {
    convolution_grad_weights_params() : training_params(KernelType::CONVOLUTION_GRAD_WEIGHTS) {}
    uSize filterSize;
    uSize stride;
    uSize dilation;
    uSize padding;
    uint32_t split = 1;
    bool depthwise_separable_opt = false;
    bool output_grad_w = false;
    std::string to_string() const override;
    ParamsKey GetParamsKey() const override {
        ParamsKey k = training_params::GetParamsKey();
        if (split > 1) {
            k.EnableSplitSupport();
        }
        if (dilation.x != 1 || dilation.y != 1) {
            k.EnableDilation();
        }
        if (depthwise_separable_opt) {
            k.EnableDepthwiseSeparableOpt();
        }
        return k;
    }
 };
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // convolution_grad_weights_optional_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct convolution_grad_weights_optional_params : training_optional_params {
    convolution_grad_weights_optional_params() : training_optional_params(KernelType::CONVOLUTION_GRAD_WEIGHTS) {}
 };
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // ConvolutionGradWeightsKernelBase
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 class ConvolutionGradWeightsKernelBase : public training_kernel_base {
 public:
    using training_kernel_base::training_kernel_base;
    virtual ~ConvolutionGradWeightsKernelBase() {}
    using DispatchData = CommonDispatchData;
 protected:
    virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const;
    virtual JitConstants GetJitConstants(const convolution_grad_weights_params& params) const;
    virtual DispatchData SetDefault(const convolution_grad_weights_params& params) const;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.cpp
@ -1,45 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "convolution_grad_weights_kernel_ref.h"
 namespace kernel_selector {
 ParamsKey ConvolutionGradWeightsKernelRef::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::F16);
    k.EnableInputDataType(Datatype::F32);
    k.EnableInputWeightsType(WeightsType::F32);
    k.EnableOutputDataType(Datatype::F16);
    k.EnableOutputDataType(Datatype::F32);
    k.EnableInputLayout(DataLayout::yxfb);
    k.EnableInputLayout(DataLayout::bfyx);
    k.EnableInputLayout(DataLayout::byxf);
    k.EnableOutputLayout(DataLayout::yxfb);
    k.EnableOutputLayout(DataLayout::bfyx);
    k.EnableOutputLayout(DataLayout::byxf);
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBiasPerFeature();
    k.EnableNonBiasTerm();
    k.EnableMomentum();
    k.EnableBatching();
    k.EnableSplitSupport();
    k.EnableGradient();
    k.DisableTuning();
    return k;
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.h
@ -1,29 +0,0 @@
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "convolution_grad_weights_kernel_base.h"
 namespace kernel_selector {
 class ConvolutionGradWeightsKernelRef : public ConvolutionGradWeightsKernelBase {
 public:
    ConvolutionGradWeightsKernelRef() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_ref") {}
    virtual ~ConvolutionGradWeightsKernelRef() {}
    ParamsKey GetSupportedKey() const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_selector.cpp
@ -1,36 +0,0 @@
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "convolution_grad_weights_kernel_selector.h"
 #include "convolution_grad_weights_kernel_ref.h"
 #include "convolution_grad_weights_kernel_1x1.h"
 #include "convolution_grad_weights_kernel_yxfb.h"
 #include "convolution_grad_weights_kernel_3x3.h"
 #include "convolution_grad_weights_kernel_7x7.h"
 namespace kernel_selector {
 convolution_grad_weights_kernel_selector::convolution_grad_weights_kernel_selector() {
    Attach<ConvolutionGradWeightsKernelRef>();
    Attach<ConvolutionGradWeightsKernel1x1>();
    Attach<ConvolutionGradWeightsKernel_yxfb>();
    Attach<ConvolutionGradWeightsKernel3x3>();
    Attach<ConvolutionGradWeightsKernel7x7>();
 }
 KernelsData convolution_grad_weights_kernel_selector::GetBestKernels(const Params& params,
                                                                     const optional_params& options) const {
    return GetNaiveBestKernel(params, options, KernelType::CONVOLUTION_GRAD_WEIGHTS);
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_selector.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_selector.h
@ -1,34 +0,0 @@
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "kernel_selector.h"
 namespace kernel_selector {
 class convolution_grad_weights_kernel_selector : public kernel_selector_base {
 public:
    static convolution_grad_weights_kernel_selector& Instance() {
        static convolution_grad_weights_kernel_selector instance_;
        return instance_;
    }
    convolution_grad_weights_kernel_selector();
    virtual ~convolution_grad_weights_kernel_selector() {}
    KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.cpp
@ -1,74 +0,0 @@
 /*
 // Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "convolution_grad_weights_kernel_yxfb.h"
 namespace kernel_selector {
 ParamsKey ConvolutionGradWeightsKernel_yxfb::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::F32);
    k.EnableInputWeightsType(WeightsType::F32);
    k.EnableOutputDataType(Datatype::F32);
    k.EnableInputLayout(DataLayout::yxfb);
    k.EnableOutputLayout(DataLayout::yxfb);
    k.EnableOutputLayout(DataLayout::bfyx);
    k.EnableOutputLayout(DataLayout::byxf);
    k.EnableSubGroup();
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBiasPerFeature();
    k.EnableNonBiasTerm();
    k.EnableMomentum();
    k.EnableBatching();
    k.EnableSplitSupport();
    k.EnableGradient();
    k.DisableTuning();
    return k;
 }
 bool ConvolutionGradWeightsKernel_yxfb::Validate(const Params& p, const optional_params&) const {
    const convolution_grad_weights_params& params = static_cast<const convolution_grad_weights_params&>(p);
    auto batch = params.inputs[0].Batch().v;
    if (batch % 16 != 0)
        return false;
    if (params.stride.x != 1 || params.stride.y != 1)
        return false;
    return true;
 }
 ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel_yxfb::SetDefault(
    const convolution_grad_weights_params& params) const {
    auto input_features = params.weights.IFM().v;
    auto output_features = params.weights.OFM().v;
    auto x = params.weights.X().v;
    auto y = params.weights.Y().v;
    DispatchData kd;
    kd.gws0 = 16;
    kd.gws1 = input_features * output_features;
    kd.gws2 = x * y;
    kd.lws0 = 16;
    kd.lws1 = 1;
    kd.lws2 = 1;
    kd.efficiency = FORCE_PRIORITY_7;
    return kd;
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.h
@ -1,32 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #pragma once
 #include "convolution_grad_weights_kernel_base.h"
 namespace kernel_selector {
 class ConvolutionGradWeightsKernel_yxfb : public ConvolutionGradWeightsKernelBase {
 public:
    ConvolutionGradWeightsKernel_yxfb() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_yxfb") {}
    virtual ~ConvolutionGradWeightsKernel_yxfb() {}
    DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
    bool Validate(const Params& p, const optional_params& o) const override;
    ParamsKey GetSupportedKey() const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp
@ -35,7 +35,6 @@ ParamsKey DeconvolutionKernel_bfyx_opt::GetSupportedKey() const {
    k.EnableBatching();
    k.EnableSplitSupport();
    k.EnableDepthwiseSeparableOpt();
    k.EnableGradient();
    k.EnableGroupedConvolution();
    return k;
 }
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp
@ -55,7 +55,6 @@ ParamsKey DeconvolutionKernelRef::GetSupportedKey() const {
    k.EnableBatching();
    k.EnableSplitSupport();
    k.EnableDepthwiseSeparableOpt();
    k.EnableGradient();
    k.EnableGroupedConvolution();
    k.EnableDifferentTypes();
    k.EnableDifferentInputWeightsTypes();
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp
@ -222,9 +222,7 @@ KernelsData EltwiseKernel_b_fs_yx_fsv16::GetKernelsData(const Params& params, co
    kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
    kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(),
                                   false,
-                                   false,
+                                   false);
                                   newParams.int8_quantization,
                                   newParams.output_calibration);
    kd.estimatedTime = runInfo.efficiency;
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv4.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv4.cpp
@ -1,288 +0,0 @@
 /*
 // Copyright (c) 2019-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "eltwise_kernel_b_fs_yx_fsv4.h"
 #include "kernel_selector_utils.h"
 #include <string>
 #include <vector>
 namespace kernel_selector {
 ParamsKey EltwiseKernel_b_fs_yx_fsv4::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::INT8);
    k.EnableInputDataType(Datatype::UINT8);
    k.EnableOutputDataType(Datatype::INT8);
    k.EnableOutputDataType(Datatype::UINT8);
    k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4);
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBatching();
    k.EnableInt8Quantization();
    k.EnableEltwiseStride();
    return k;
 }
 EltwiseKernelBase::DispatchData EltwiseKernel_b_fs_yx_fsv4::SetDefault(const eltwise_params& params) const {
    DispatchData kd;
    // Because of very specific requirements for data, we may linearize the data,
    // i.e. use only one dimension, e.g. 'X'.
    // GWS:
    // we process 4*4 (4 int8 bytes per on block_read4 reading) features per workitem
    kd.gws0 = params.output.X().v * params.output.Y().v * params.output.Batch().v * params.output.Feature().v / (4 * 4);
    kd.gws1 = 1;
    kd.gws2 = 1;
    // LWS:
    kd.lws0 = 8;
    kd.lws1 = 1;
    kd.lws2 = 1;
    kd.efficiency = FORCE_PRIORITY_1;
    return kd;
 }
 bool EltwiseKernel_b_fs_yx_fsv4::Validate(const Params& params, const optional_params& options) const {
    // Requirents to use 'eltwise_b_fs_yx_fsv4' kernel are below:
    // 1. No stride
    // 2. All dimensions for all inputs are the same
    // 3. No padding
    // So, it can be linearized
    if (!Parent::Validate(params, options)) {
        return false;
    }
    KernelData kd = KernelData::Default<eltwise_params>(params);
    eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get());
    // 1. No stride
    if (!newParams.stride.empty()) {
        return false;
    }
    for (size_t i = 0; i < newParams.inputs.size() - 1; i++) {
        // 2. All dimensions for all inputs are the same
        if (!(newParams.inputs[i] == newParams.inputs[i + 1])) {
            return false;
        }
    }
    const auto& in = newParams.inputs[0];
    for (size_t i = 0; i < in.Dimentions(); i++) {
        // 3. No padding
        if ((in.GetDims()[i].pad.before != 0) || (in.GetDims()[i].pad.after != 0)) {
            return false;
        }
    }
    return true;
 }
 JitConstants EltwiseKernel_b_fs_yx_fsv4::GetJitConstants(const eltwise_params& params) const {
    JitConstants jit = MakeBaseParamsJitConstants(params);
    if (params.inputs[0].GetDType() == Datatype::UINT8) {
        // Special handler for unsigned types
        jit.AddConstants({MakeJitConstant("ELTW_UNSIGNED", 1)});
    }
    ///////////////
    jit.AddConstants({
        MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased),
        MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization),
    });
    if (params.int8_quantization) {
        if (params.output_calibration) {
            jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration));
            jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0]));
        } else {
            jit.AddConstants({MakeJitConstant("O_QF", params.output_quantization_factor)});
        }
    }
    std::string inputs_decls;
    auto& updateInputs = params.updateInputIds;
    for (size_t i = 0; i < params.inputs.size(); i++) {
        // const should be added only to inputs which will not be updated
        std::string const_str = "const";
        for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++) {
            if (updateInputs[update_input_idx].inputId == i) {
                const_str = "";
                break;
            }
        }
        inputs_decls +=
            const_str + " __global " + toCLType(params.inputs[i].GetDType()) + "* input" + std::to_string(i) + ", ";
    }
    jit.AddConstant(MakeJitConstant("INPUTS_DECLS", inputs_decls));
    jit.AddConstant(MakeJitConstant("ELTWISE_NO_PITCH_SAME_DIMS", CheckInputsOutputNoPitchSameDims(params)));
    std::string do_eltwise;
    auto& operations = params.operations;
    auto& coefficients = params.coefficients;
    for (size_t op_num = 0; op_num < operations.size(); op_num++) {
        const std::string op_num_str = std::to_string(op_num);
        const auto& ew = operations[op_num];
        for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) {
            const auto& input = ew.inputs[input_idx];
            const std::string name = "INPUT_" + op_num_str + "_" + std::to_string(input_idx);
            switch (input.mode) {
                case EltwiseInputMode::SCALAR:
                    jit.AddConstant(MakeJitConstant(name, input.scalar));
                    break;
                case EltwiseInputMode::INPUT_BUFFER:
                    jit.AddConstant(MakeJitConstant(name,
                                                    "GET_INPUT(input" + std::to_string(input.index) + ", INPUT" +
                                                        std::to_string(input.index) + ")"));
                    break;
                case EltwiseInputMode::OUTPUT_BUFFER:
                    jit.AddConstant(MakeJitConstant(name, "output[GET_INDEX(OUTPUT, )]"));
                    break;
                case EltwiseInputMode::UNORDERED_ACCESS_INPUT_BUFFER:
                    jit.AddConstant(MakeJitConstant(
                        name,
                        "input" + std::to_string(input.index) + "[(size_t)tmp" + std::to_string(input.tmpIndex) + "]"));
                    break;
                case EltwiseInputMode::INTERMEDIATE_RESULTS_INDEX:
                    jit.AddConstant(MakeJitConstant(name, "tmp" + std::to_string(input.tmpIndex)));
                    break;
                default:
                    break;
            }
        }
        std::string input0_str, input1_str, cast_type, op;
        cast_type = "(int16)";
        op = "const int16 tmp" + op_num_str + " = ";
        input0_str = cast_type + "INPUT_" + op_num_str + "_0";
        input1_str = cast_type + "INPUT_" + op_num_str + "_1";
        if (ew.mode == EltwiseMode::ADD) {
            std::vector<std::string> coeff_strings(ew.inputs.size(), "");
            for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) {
                const auto& input = ew.inputs[input_idx];
                if (input.mode == EltwiseInputMode::INPUT_BUFFER && input.index < coefficients.size()) {
                    const float c = coefficients[input.index];
                    if (c != 1.0f)
                        coeff_strings[input_idx] = cast_type + "(" + std::to_string(c) + ")*";
                }
            }
            input0_str = coeff_strings[0] + input0_str;
            input1_str = coeff_strings[1] + input1_str;
        }
        switch (ew.mode) {
            case EltwiseMode::ADD:
                op += input0_str + " + " + input1_str;
                break;
            case EltwiseMode::SUB:
                op += input0_str + " - " + input1_str;
                break;
            case EltwiseMode::MUL:
                op += input0_str + " * " + input1_str;
                break;
            case EltwiseMode::DIV:
                op += input0_str + " / " + input1_str;
                break;
            case EltwiseMode::MODULU:
            case EltwiseMode::MIN:
            case EltwiseMode::MAX: {
                auto mode = (ew.mode == EltwiseMode::MODULU ? "mod" : (ew.mode == EltwiseMode::MIN ? "min" : "max"));
                auto input_0_type = params.inputs[0].GetDType();
                auto input_1_type = params.inputs[1].GetDType();
                // input_0 == int
                if (input_0_type == kernel_selector::Datatype::INT8 ||
                    input_0_type == kernel_selector::Datatype::UINT8) {
                    // input_0 == int && input_1 == int
                    if (input_1_type == kernel_selector::Datatype::INT8 ||
                        input_1_type == kernel_selector::Datatype::UINT8) {
                        if (ew.mode == EltwiseMode::MODULU)
                            op += input0_str + " % " + input1_str;
                        else
                            op += cast_type + mode + "(" + input0_str + ", " + input1_str + ")";
                    // input_0 == int && input_1 != int
                    } else {
                        op += cast_type + "f" + mode + "(convert_float(" + input0_str + "), " + input1_str + ")";
                    }
                // input_0 != int && input_1 == int
                } else if (input_1_type == kernel_selector::Datatype::INT8 ||
                         input_1_type == kernel_selector::Datatype::UINT8) {
                    op += cast_type + "f" + mode + "(" + input0_str + ", convert_float(" + input1_str + "))";
                // input_0 != int && input_1 != int
                } else {
                    op += cast_type + "f" + mode + "(" + input0_str + ", " + input1_str + ")";
                }
            } break;
            case EltwiseMode::POW:
                op += cast_type + "pow(" + input0_str + ", " + input1_str + ")";
                break;
            case EltwiseMode::SQRT:
                op += cast_type + "sqrt(" + input0_str + ")";
                break;
            case EltwiseMode::RSQRT:
                op += cast_type + "1/sqrt(" + input0_str + ")";
                break;
            case EltwiseMode::ASSIGN:
                op += input0_str;
                break;
            default:
                break;
        }
        std::string opname = "OPERATION" + op_num_str;
        jit.AddConstant(MakeJitConstant(opname, op));
        do_eltwise += "\\\n\t" + opname + ";";
    }
    for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++)
        do_eltwise += "\\\n\tinput" + std::to_string(updateInputs[update_input_idx].inputId) + "[GET_INDEX(INPUT, " +
                      std::to_string(updateInputs[update_input_idx].inputId) + ")] = tmp" +
                      std::to_string(updateInputs[update_input_idx].tmpId) + ";";
    do_eltwise += "\\\n\tres = tmp" + std::to_string(operations.size() - 1) + ";";
    jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise));
    if (params.layoutBased || params.int8_quantization) {
        jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0]));
    }
    if (!params.stride.empty()) {
        jit.AddConstant(MakeJitConstant("INPUT_STRIDED", 1));
    }
    ///////////////
    return jit;
 }
 KernelsData EltwiseKernel_b_fs_yx_fsv4::GetKernelsData(const Params& params, const optional_params& options) const {
    return GetCommonKernelsData(params, options);
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv4.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv4.h
@ -1,36 +0,0 @@
 /*
 // Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #pragma once
 #include "eltwise_kernel_base.h"
 namespace kernel_selector {
 class EltwiseKernel_b_fs_yx_fsv4 : public EltwiseKernelBase {
 public:
    using Parent = EltwiseKernelBase;
    EltwiseKernel_b_fs_yx_fsv4() : EltwiseKernelBase("eltwise_b_fs_yx_fsv4") {}
    virtual ~EltwiseKernel_b_fs_yx_fsv4() {}
    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
    ParamsKey GetSupportedKey() const override;
 protected:
    bool Validate(const Params& params, const optional_params& options) const override;
    JitConstants GetJitConstants(const eltwise_params& params) const override;
    DispatchData SetDefault(const eltwise_params& params) const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
@ -51,17 +51,6 @@ static uint32_t GetNumberOfInputs(EltwiseMode m) {
 ParamsKey eltwise_params::GetParamsKey() const {
    ParamsKey k = base_params::GetParamsKey();
    if (int8_quantization) {
        k.EnableInt8Quantization();
    }
    if (output_calibration) {
        k.EnableOutputCalibration();
    }
    if (inputs_calibration) {
        k.EnableEltwiseInputsCalibration();
    }
    if (!stride.empty()) {
        k.EnableEltwiseStride();
@ -617,9 +606,7 @@ KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const
    kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
    kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(),
                                   false,
-                                   false,
+                                   false);
                                   newParams.int8_quantization,
                                   newParams.output_calibration);
    kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
@ -84,14 +84,8 @@ struct eltwise_params : public base_params {
    bool layoutBased = false;
    bool int8_quantization = false;
    bool output_calibration = false;
    float output_quantization_factor = 1.0f;
    bool inputs_calibration = false;
    bool broadcast = false;
    MultiDataTensor output_calibration_factors;
    MultiDataTensor inputs_calibration_factors;
    std::vector<float> input_quantization_factors;
    virtual ParamsKey GetParamsKey() const;
 };
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp
@ -1,301 +0,0 @@
 // Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h"
 #include "kernel_selector_utils.h"
 #include <string>
 #include <vector>
 namespace kernel_selector {
 ParamsKey EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::INT8);
    k.EnableOutputDataType(Datatype::INT8);
    k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
    k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBatching();
    k.EnableInt8Quantization();
    k.EnableEltwiseStride();
    return k;
 }
 EltwiseKernelBase::DispatchData EltwiseKernel_fs_bs_yx_bsv4_fsv32::SetDefault(const eltwise_params& params) const {
    DispatchData kd;
    kd.gws0 = params.output.X().v;
    kd.gws1 = params.output.Y().v;
    // we process 4 batches and 4 features per workitem
    kd.gws2 = (params.output.Batch().v / 4) * (params.output.Feature().v / 4);
    kd.lws0 = 1;
    kd.lws1 = 1;
    kd.lws2 = 8;
    kd.efficiency = FORCE_PRIORITY_3;
    return kd;
 }
 JitConstants EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetJitConstants(const eltwise_params& params) const {
    JitConstants jit = MakeBaseParamsJitConstants(params);
    const size_t in_x_pitch = 32 * 4;
    const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
    const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
    const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
    const size_t in_offset =
        in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
    jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
    jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
    jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
    jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
    jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
    ///////////////
    jit.AddConstants({
        MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased),
        MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization),
    });
    if (params.int8_quantization) {
        if (params.output_calibration) {
            jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration));
            jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0]));
        } else {
            jit.AddConstants({MakeJitConstant("O_QF", params.output_quantization_factor)});
        }
    }
    std::string inputs_decls;
    auto& updateInputs = params.updateInputIds;
    for (size_t i = 0; i < params.inputs.size(); i++) {
        // const should be added only to inputs which will not be updated
        std::string const_str = "const";
        for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++) {
            if (updateInputs[update_input_idx].inputId == i) {
                const_str = "";
                break;
            }
        }
        inputs_decls +=
            const_str + " __global " + toCLType(params.inputs[i].GetDType()) + "* input" + std::to_string(i) + ", ";
        if (!params.stride.empty()) {
            jit.AddConstant(MakeJitConstant("INPUT" + std::to_string(i) + "_STRIDE_X", params.stride[i].x));
            jit.AddConstant(MakeJitConstant("INPUT" + std::to_string(i) + "_STRIDE_Y", params.stride[i].y));
        }
    }
    jit.AddConstant(MakeJitConstant("INPUTS_DECLS", inputs_decls));
    jit.AddConstant(MakeJitConstant("ELTWISE_NO_PITCH_SAME_DIMS", CheckInputsOutputNoPitchSameDims(params)));
    std::string do_eltwise;
    auto& operations = params.operations;
    auto& coefficients = params.coefficients;
    for (size_t op_num = 0; op_num < operations.size(); op_num++) {
        const std::string op_num_str = std::to_string(op_num);
        const auto& ew = operations[op_num];
        for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) {
            const auto& input = ew.inputs[input_idx];
            const std::string name = "INPUT_" + op_num_str + "_" + std::to_string(input_idx);
            switch (input.mode) {
                case EltwiseInputMode::SCALAR:
                    jit.AddConstant(MakeJitConstant(name, input.scalar));
                    break;
                case EltwiseInputMode::INPUT_BUFFER:
                    jit.AddConstant(MakeJitConstant(name,
                                                    "GET_INPUT(input" + std::to_string(input.index) + ", INPUT" +
                                                        std::to_string(input.index) + ")"));
                    break;
                case EltwiseInputMode::OUTPUT_BUFFER:
                    jit.AddConstant(MakeJitConstant(name, "output[GET_INDEX(OUTPUT, )]"));
                    break;
                case EltwiseInputMode::UNORDERED_ACCESS_INPUT_BUFFER:
                    jit.AddConstant(MakeJitConstant(
                        name,
                        "input" + std::to_string(input.index) + "[(size_t)tmp" + std::to_string(input.tmpIndex) + "]"));
                    break;
                case EltwiseInputMode::INTERMEDIATE_RESULTS_INDEX:
                    jit.AddConstant(MakeJitConstant(name, "tmp" + std::to_string(input.tmpIndex)));
                    break;
                default:
                    break;
            }
        }
        std::string input0_str, input1_str, cast_type, op;
        if (params.int8_quantization) {
            cast_type = "(int16)";
            op = "const int16 tmp" + op_num_str + " = ";
        } else {
            cast_type = "(UNIT_TYPE)";
            op = "const UNIT_TYPE tmp" + op_num_str + " = ";
        }
        input0_str = cast_type + "INPUT_" + op_num_str + "_0";
        input1_str = cast_type + "INPUT_" + op_num_str + "_1";
        if (ew.mode == EltwiseMode::ADD) {
            std::vector<std::string> coeff_strings(ew.inputs.size(), "");
            for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) {
                const auto& input = ew.inputs[input_idx];
                if (input.mode == EltwiseInputMode::INPUT_BUFFER && input.index < coefficients.size()) {
                    const float c = coefficients[input.index];
                    if (c != 1.0f)
                        coeff_strings[input_idx] = cast_type + "(" + std::to_string(c) + ")*";
                }
            }
            input0_str = coeff_strings[0] + input0_str;
            input1_str = coeff_strings[1] + input1_str;
        }
        switch (ew.mode) {
            case EltwiseMode::ADD:
                op += input0_str + " + " + input1_str;
                break;
            case EltwiseMode::SUB:
                op += input0_str + " - " + input1_str;
                break;
            case EltwiseMode::MUL:
                op += input0_str + " * " + input1_str;
                break;
            case EltwiseMode::DIV:
                op += input0_str + " / " + input1_str;
                break;
            case EltwiseMode::MODULU:
            case EltwiseMode::MIN:
            case EltwiseMode::MAX: {
                auto mode = (ew.mode == EltwiseMode::MODULU ? "mod" : (ew.mode == EltwiseMode::MIN ? "min" : "max"));
                auto input_0_type = params.inputs[0].GetDType();
                auto input_1_type = params.inputs[1].GetDType();
                // input_0 == int
                if (input_0_type == kernel_selector::Datatype::INT8 ||
                    input_0_type == kernel_selector::Datatype::INT32 ||
                    input_0_type == kernel_selector::Datatype::INT64) {
                    // input_0 == int && input_1 == int
                    if (input_1_type == kernel_selector::Datatype::INT8 ||
                        input_1_type == kernel_selector::Datatype::INT32 ||
                        input_1_type == kernel_selector::Datatype::INT64) {
                        if (ew.mode == EltwiseMode::MODULU)
                            op += input0_str + " % " + input1_str;
                        else
                            op += cast_type + mode + "(" + input0_str + ", " + input1_str + ")";
                    // input_0 == int && input_1 != int
                    } else {
                        op += cast_type + "f" + mode + "(convert_float(" + input0_str + "), " + input1_str + ")";
                    }
                // input_0 != int && input_1 == int
                } else if (input_1_type == kernel_selector::Datatype::INT8 ||
                         input_1_type == kernel_selector::Datatype::INT32 ||
                         input_1_type == kernel_selector::Datatype::INT64) {
                    op += cast_type + "f" + mode + "(" + input0_str + ", convert_float(" + input1_str + "))";
                // input_0 != int && input_1 != int
                } else {
                    op += cast_type + "f" + mode + "(" + input0_str + ", " + input1_str + ")";
                }
            } break;
            case EltwiseMode::POW:
                op += cast_type + "pow(" + input0_str + ", " + input1_str + ")";
                break;
            case EltwiseMode::SQRT:
                op += cast_type + "sqrt(" + input0_str + ")";
                break;
            case EltwiseMode::RSQRT:
                op += cast_type + "1/sqrt(" + input0_str + ")";
                break;
            case EltwiseMode::SQUARED_DIFF:
                op += cast_type + "((" + input0_str + " - " + input1_str +
                      ")"
                      " * (" +
                      input0_str + " - " + input1_str + "))";
                break;
            case EltwiseMode::EQ:
                op += cast_type + "(" + input0_str + " == " + input1_str + ")";
                break;
            case EltwiseMode::NE:
                op += cast_type + "(" + input0_str + " != " + input1_str + ")";
                break;
            case EltwiseMode::LT:
                op += cast_type + "(" + input0_str + " < " + input1_str + ")";
                break;
            case EltwiseMode::LE:
                op += cast_type + "(" + input0_str + " <= " + input1_str + ")";
                break;
            case EltwiseMode::GT:
                op += cast_type + "(" + input0_str + " > " + input1_str + ")";
                break;
            case EltwiseMode::GE:
                op += cast_type + "(" + input0_str + " >= " + input1_str + ")";
                break;
            case EltwiseMode::LOGIC_AND:
                op += cast_type + "(" + input0_str + " && " + input1_str + ")";
                break;
            case EltwiseMode::LOGIC_OR:
                op += cast_type + "(" + input0_str + " || " + input1_str + ")";
                break;
            case EltwiseMode::LOGIC_XOR:
                op += cast_type + "(!" + input0_str + " != !" + input1_str + ")";
                break;
            case EltwiseMode::FLOOR_MOD:
                op += cast_type + "(" + input0_str + " - " + input0_str + " / " + input1_str + " * " + input1_str + ")";
                break;
            case EltwiseMode::ASSIGN:
                op += input0_str;
                break;
            default:
                break;
        }
        std::string opname = "OPERATION" + op_num_str;
        jit.AddConstant(MakeJitConstant(opname, op));
        do_eltwise += "\\\n\t" + opname + ";";
    }
    for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++)
        do_eltwise += "\\\n\tinput" + std::to_string(updateInputs[update_input_idx].inputId) + "[GET_INDEX(INPUT, " +
                      std::to_string(updateInputs[update_input_idx].inputId) + ")] = tmp" +
                      std::to_string(updateInputs[update_input_idx].tmpId) + ";";
    do_eltwise += "\\\n\tres = tmp" + std::to_string(operations.size() - 1) + ";";
    jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise));
    if (params.layoutBased || params.int8_quantization) {
        jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0]));
    }
    if (!params.stride.empty()) {
        jit.AddConstant(MakeJitConstant("INPUT_STRIDED", 1));
    }
    ///////////////
    return jit;
 }
 KernelsData EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params,
                                                              const optional_params& options) const {
    return GetCommonKernelsData(params, options);
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h
@ -1,32 +0,0 @@
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "eltwise_kernel_base.h"
 namespace kernel_selector {
 class EltwiseKernel_fs_bs_yx_bsv4_fsv32 : public EltwiseKernelBase {
 public:
    EltwiseKernel_fs_bs_yx_bsv4_fsv32() : EltwiseKernelBase("eltwise_fs_bs_yx_bsv4_fsv32") {}
    virtual ~EltwiseKernel_fs_bs_yx_bsv4_fsv32() {}
    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
    ParamsKey GetSupportedKey() const override;
 protected:
    JitConstants GetJitConstants(const eltwise_params& params) const override;
    DispatchData SetDefault(const eltwise_params& params) const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
@ -38,7 +38,6 @@ ParamsKey EltwiseKernelRef::GetSupportedKey() const {
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBatching();
    k.EnableInt8Quantization();
    k.EnableEltwiseStride();
    k.EnableEltwiseBroadcast();
    return k;
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp
@ -16,8 +16,6 @@
 #include "eltwise_kernel_selector.h"
 #include "eltwise_kernel_ref.h"
 #include "eltwise_kernel_vload8.h"
 #include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h"
 #include "eltwise_kernel_b_fs_yx_fsv4.h"
 #include "eltwise_kernel_fs_b_yx_fsv32.h"
 #include "eltwise_kernel_b_fs_yx_fsv16.h"
 #include "eltwise_kernel_mixed_byxf_and_fs_b_yx_fsv32.h"
@ -26,8 +24,6 @@ namespace kernel_selector {
 eltwise_kernel_selector::eltwise_kernel_selector() {
    Attach<EltwiseKernelRef>();
    Attach<EltwiseKernel_vload8>();
    Attach<EltwiseKernel_fs_bs_yx_bsv4_fsv32>();
    Attach<EltwiseKernel_b_fs_yx_fsv4>();
    Attach<EltwiseKernel_fs_b_yx_fsv32>();
    Attach<EltwiseKernel_mixed_byxf_and_fs_b_yx_fsv32>();
    Attach<EltwiseKernel_b_fs_yx_fsv16>();
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp
@ -1,108 +0,0 @@
 /*
 // Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "embed_kernel_ref.h"
 #include "kernel_selector_utils.h"
 #include "common_tools.h"
 #include <vector>
 namespace kernel_selector {
 ParamsKey EmbedKernelRef::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::F16);
    k.EnableInputDataType(Datatype::F32);
    k.EnableInputDataType(Datatype::INT8);
    k.EnableOutputDataType(Datatype::F16);
    k.EnableOutputDataType(Datatype::F32);
    k.EnableOutputDataType(Datatype::INT8);
    k.EnableInputWeightsType(WeightsType::F16);
    k.EnableInputWeightsType(WeightsType::F32);
    k.EnableInputWeightsType(WeightsType::INT8);
    k.EnableAllInputLayout();
    k.EnableOutputLayout(DataLayout::bf);
    k.EnableBiasPerOutput();
    k.EnableBiasPerFeature();
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBatching();
    k.EnableNonBiasTerm();
    return k;
 }
 JitConstants EmbedKernelRef::GetJitConstants(const embed_params& params) const {
    JitConstants jit = WeightBiasKernelBase::GetJitConstants(params);
    const auto& input = params.inputs[0];
    const auto x_size = input.LogicalSize() / input.Batch().v;
    const auto w_size = params.weights.OFM().v;
    jit.AddConstant(MakeJitConstant("INPUT0_ELEMENTS_COUNT", x_size));
    jit.AddConstant(MakeJitConstant("NUM_OUTPUT_SIZE", w_size));
    return jit;
 }
 EmbedKernelRef::DispatchData EmbedKernelRef::SetDefault(const embed_params& params) const {
    DispatchData kd;
    std::vector<size_t> global = {params.inputs[0].X().v, params.weights.OFM().v, params.inputs[0].Batch().v};
    std::vector<size_t> local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
    kd.gws0 = global[0];
    kd.gws1 = global[1];
    kd.gws2 = global[2];
    kd.lws0 = local[0];
    kd.lws1 = local[1];
    kd.lws2 = 1;
    return kd;
 }
 KernelsData EmbedKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
    assert(params.GetType() == KernelType::EMBED);
    const embed_params& orgParams = static_cast<const embed_params&>(params);
    DispatchData runInfo = SetDefault(orgParams);
    KernelData kd = KernelData::Default<embed_params>(params);
    embed_params& newParams = *static_cast<embed_params*>(kd.params.get());
    bool succeed = UpdateWeightsParams(newParams, options, WeightsLayout::oiyx, kd.weightsReorderParams);
    if (!succeed) {
        return {};
    }
    auto cldnn_jit = GetJitConstants(newParams);
    auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
    auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
    auto& kernel = kd.kernels[0];
    FillCLKernelData(kernel,
                     runInfo,
                     params.engineInfo,
                     kernelName,
                     jit,
                     entry_point,
                     DEFAULT,
                     true,
                     !newParams.bias.empty());
    kd.estimatedTime = runInfo.efficiency;
    return {kd};
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.h
@ -1,42 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #pragma once
 #include "weight_bias_kernel_base.h"
 #include "embed_params.h"
 #include "common_kernel_base.h"
 namespace kernel_selector {
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // EmbedKernelRef
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 class EmbedKernelRef : public WeightBiasKernelBase {
 public:
    EmbedKernelRef() : WeightBiasKernelBase("embed_ref") {}
    virtual ~EmbedKernelRef() {}
    struct DispatchData : public CommonDispatchData {};
    ParamsKey GetSupportedKey() const override;
 protected:
    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
    virtual JitConstants GetJitConstants(const embed_params& params) const;
    virtual DispatchData SetDefault(const embed_params& params) const;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_selector.cpp
@ -1,27 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #include "embed_kernel_selector.h"
 #include "embed_kernel_ref.h"
 namespace kernel_selector {
 embed_kernel_selector::embed_kernel_selector() { Attach<EmbedKernelRef>(); }
 KernelsData embed_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
    return GetNaiveBestKernel(params, options, KernelType::EMBED);
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_selector.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_selector.h
@ -1,35 +0,0 @@
 /*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #pragma once
 #include "kernel_selector.h"
 namespace kernel_selector {
 class embed_kernel_selector : public kernel_selector_base {
 public:
    static embed_kernel_selector& Instance() {
        static embed_kernel_selector instance_;
        return instance_;
    }
    embed_kernel_selector();
    virtual ~embed_kernel_selector() {}
    KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_params.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_params.h
@ -1,51 +0,0 @@
 /*
 // Copyright (c) 2016 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
 #pragma once
 #include "weight_bias_params.h"
 #include <string>
 namespace kernel_selector {
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // embed_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct embed_params : public weight_bias_params {
    embed_params() : weight_bias_params(KernelType::EMBED) {}
    std::string to_string() const {
        std::stringstream s;
        s << base_params::to_string() << "_";
        if (bias.empty()) {
            s << "no_bias"
              << "_";
        } else {
            s << "bias_" << bias[0].PhysicalSize() << "_";
        }
        return s.str();
    }
    virtual ParamsKey GetParamsKey() const { return weight_bias_params::GetParamsKey(); }
 };
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // embed_optional_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct embed_optional_params : weight_bias_optional_params {
    embed_optional_params() : weight_bias_optional_params(KernelType::EMBED) {}
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp
@ -29,8 +29,6 @@ JitConstants FullyConnectedKernelBase::GetJitConstants(const fully_connected_par
    jit.AddConstant(MakeJitConstant("INPUT0_ELEMENTS_COUNT", x_size));
    jit.AddConstant(MakeJitConstant("QUANTIZATION_TERM", params.quantization != QuantizationType::NONE));
    return jit;
 }
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.cpp
@ -1,122 +0,0 @@
 // Copyright (c) 2016-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fully_connected_kernel_mmad_batched.h"
 namespace kernel_selector {
 ParamsKey FullyConnected_mmad_batched::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::INT8);
    k.EnableOutputDataType(Datatype::INT8);
    k.EnableInputWeightsType(WeightsType::INT8);
    k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
    k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
    k.EnableOutputLayout(DataLayout::bf);
    k.EnableBiasPerOutput();
    k.EnableBiasPerFeature();
    k.EnableNonBiasTerm();
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBatching();
    k.EnableInt8Quantization();
    k.EnableOutputCalibration();
    return k;
 }
 bool FullyConnected_mmad_batched::Validate(const Params& p, const optional_params& o) const {
    if (!FullyConnectedKernelBase::Validate(p, o)) {
        return false;
    }
    const auto& params = static_cast<const fully_connected_params&>(p);
    // we do not support padded input
    if (params.inputs[0].X().pad.Total() != 0 || params.inputs[0].Y().pad.Total() != 0)
        return false;
    size_t batch = params.inputs[0].Batch().v;
    // batch must be a multiple of 8
    if (batch % 8 != 0) {
        return false;
    }
    return true;
 }
 JitConstants FullyConnected_mmad_batched::GetJitConstants(const fully_connected_params& params,
                                                          const DispatchData& runInfo) const {
    auto jit = Parent::GetJitConstants(params, runInfo);
    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws1));
    // pitch for special block format used in this kernel
    const size_t ifm_32_aligned = Align(params.weights.IFM().v, 32);
    const size_t filter_ofm_block_pitch =
        (ifm_32_aligned / 32) * params.weights.X().v * params.weights.Y().v * 4 * 8 * 8;
    jit.AddConstant(MakeJitConstant("FILTER_OFM_BLOCK_PITCH", filter_ofm_block_pitch));
    const size_t in_x_pitch = 32 * 4;
    const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
    const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
    const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
    const size_t in_offset =
        in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
    jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
    jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
    jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
    jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
    jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
    return jit;
 }
 FullyConnected_mmad_batched::DispatchData FullyConnected_mmad_batched::SetDefault(const fully_connected_params& params,
                                                                                  int) const {
    auto runInfo = Parent::SetDefault(params);
    constexpr size_t sub_group_size = 8;
    const auto of_maps = params.output.Feature().v;
    const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
    runInfo.gws0 = params.output.Batch().v / 8;  // we process 8 batches in a single WG
    runInfo.gws1 = of_threads_per_batch;
    runInfo.gws2 = 1;
    runInfo.lws0 = 1;
    runInfo.lws1 = sub_group_size;
    runInfo.lws2 = 1;
    runInfo.efficiency = FORCE_PRIORITY_1;
    return runInfo;
 }
 KernelsData FullyConnected_mmad_batched::GetKernelsData(const Params& params, const optional_params& options) const {
    KernelsData res = {};
    for (size_t i = 0; i < autoTuneOptions.size(); i++) {
        KernelsData kd = GetTunedKernelsDataByIndex(params,
                                                    options,
                                                    DataLayout::fs_bs_yx_bsv4_fsv32,
                                                    WeightsLayout::os_is_yx_isa8_osv8_isv4,
                                                    FORCE_PRIORITY_1,
                                                    static_cast<int>(i));
        if (!kd.empty()) {
            res.emplace_back(kd[0]);
        }
    }
    return res;
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.h
@ -1,36 +0,0 @@
 // Copyright (c) 2016 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "fully_connected_kernel_base.h"
 namespace kernel_selector {
 class FullyConnected_mmad_batched : public FullyConnectedKernelBase {
 public:
    using Parent = FullyConnectedKernelBase;
    FullyConnected_mmad_batched() : Parent("fully_connected_gpu_mmad_batched") {}
    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
    ParamsKey GetSupportedKey() const override;
 protected:
    bool Validate(const Params& p, const optional_params& o) const override;
    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
    DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp
@ -28,7 +28,6 @@
 #include "fully_connected_kernel_fb_io_block.h"
 #include "fully_connected_kernel_bf_io_input_spatial.h"
 #include "fully_connected_kernel_mmad.h"
 #include "fully_connected_kernel_mmad_batched.h"
 #include "fully_connected_kernel_imad.h"
 #include "fully_connected_kernel_fs_byx_fsv32.h"
@ -49,7 +48,6 @@ fully_connected_kernel_selector::fully_connected_kernel_selector() {
    Attach<FullyConnected_fb_io_b8_f8>();
    Attach<FullyConnected_bf_io_input_spatial>();
    Attach<FullyConnectedKernelMMAD>();
    // Attach<FullyConnected_mmad_batched>();
    Attach<FullyConnectedKernelIMAD>();
    Attach<FullyConnected_fs_byx_fsv32>();
 }
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp
@ -1,82 +0,0 @@
 // Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fully_connected_grad_input_kernel_base.h"
 #include "kernel_selector_utils.h"
 #include <vector>
 #include <algorithm>
 namespace kernel_selector {
 JitConstants FullyConnectedGradInputKernelBase::GetJitConstants(const fully_connected_grad_input_params& params) const {
    return WeightBiasKernelBase::GetJitConstants(params);
 }
 FullyConnectedGradInputKernelBase::DispatchData FullyConnectedGradInputKernelBase::SetDefault(
    const fully_connected_grad_input_params& params) const {
    DispatchData kd;
    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
    size_t gws0 = params.output.Batch().v * params.weights.IFM().v;
    size_t lws0 = std::min(gws0, static_cast<size_t>(32));
    while (gws0 % lws0) {
        lws0--;
    }
    kd.gws0 = gws0;
    kd.gws1 = params.weights.X().v;
    kd.gws2 = params.weights.Y().v;
    kd.lws0 = lws0;
    kd.lws1 = 1;
    kd.lws2 = 1;
    kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
    return kd;
 }
 KernelsData FullyConnectedGradInputKernelBase::GetKernelsData(const Params& params,
                                                              const optional_params& options) const {
    assert(params.GetType() == KernelType::FULLY_CONNECTED_GRAD_INPUT);
    const fully_connected_grad_input_params& orgParams = static_cast<const fully_connected_grad_input_params&>(params);
    DispatchData runInfo = SetDefault(orgParams);
    KernelData kd = KernelData::Default<fully_connected_grad_input_params>(params);
    fully_connected_grad_input_params& newParams = *static_cast<fully_connected_grad_input_params*>(kd.params.get());
    bool succeed = UpdateWeightsParams(newParams, options, WeightsLayout::oi, kd.weightsReorderParams);
    if (!succeed) {
        return {};
    }
    auto cldnn_jit = GetJitConstants(orgParams);
    auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
    auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
    auto& kernel = kd.kernels[0];
    FillCLKernelData(kernel,
                     runInfo,
                     params.engineInfo,
                     kernelName,
                     jit,
                     entry_point,
                     DEFAULT,
                     true,
                     !orgParams.bias.empty());
    kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
    kd.estimatedTime = runInfo.efficiency;
    return {kd};
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.h
@ -1,54 +0,0 @@
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "weight_bias_kernel_base.h"
 #include "kernel_selector_params.h"
 namespace kernel_selector {
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // fully_connected_grad_input_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct fully_connected_grad_input_params : public weight_bias_params {
    fully_connected_grad_input_params() : weight_bias_params(KernelType::FULLY_CONNECTED_GRAD_INPUT) {}
    virtual ParamsKey GetParamsKey() const { return weight_bias_params::GetParamsKey(); }
 };
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // fully_connected_grad_input_optional_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct fully_connected_grad_input_optional_params : weight_bias_optional_params {
    fully_connected_grad_input_optional_params()
        : weight_bias_optional_params(KernelType::FULLY_CONNECTED_GRAD_INPUT) {}
 };
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // FullyConnectedGradInputKernelBase
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 class FullyConnectedGradInputKernelBase : public WeightBiasKernelBase {
 public:
    using WeightBiasKernelBase::WeightBiasKernelBase;
    virtual ~FullyConnectedGradInputKernelBase() {}
    using DispatchData = CommonDispatchData;
 protected:
    virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const;
    virtual JitConstants GetJitConstants(const fully_connected_grad_input_params& params) const;
    virtual DispatchData SetDefault(const fully_connected_grad_input_params& params) const;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_ref.cpp
@ -1,44 +0,0 @@
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fully_connected_grad_input_kernel_ref.h"
 namespace kernel_selector {
 ParamsKey FullyConnectedGradInputKernelRef::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::F16);
    k.EnableInputDataType(Datatype::F32);
    k.EnableInputWeightsType(WeightsType::F16);
    k.EnableInputWeightsType(WeightsType::F32);
    k.EnableOutputDataType(Datatype::F16);
    k.EnableOutputDataType(Datatype::F32);
    k.EnableInputLayout(DataLayout::yxfb);
    k.EnableInputLayout(DataLayout::bfyx);
    k.EnableInputLayout(DataLayout::byxf);
    k.EnableOutputLayout(DataLayout::yxfb);
    k.EnableOutputLayout(DataLayout::bfyx);
    k.EnableOutputLayout(DataLayout::byxf);
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBiasPerFeature();
    k.EnableNonBiasTerm();
    // TODO: add support to batching, figure out the way to update weights/biases for multiple batches at the same time
    k.EnableBatching();
    k.EnableGradient();
    k.DisableTuning();
    return k;
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_ref.h
@ -1,29 +0,0 @@
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "fully_connected_grad_input_kernel_base.h"
 namespace kernel_selector {
 class FullyConnectedGradInputKernelRef : public FullyConnectedGradInputKernelBase {
 public:
    FullyConnectedGradInputKernelRef() : FullyConnectedGradInputKernelBase("fully_connected_grad_input_gpu_ref") {}
    virtual ~FullyConnectedGradInputKernelRef() {}
    ParamsKey GetSupportedKey() const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_selector.cpp
@ -1,28 +0,0 @@
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fully_connected_grad_input_kernel_selector.h"
 #include "fully_connected_grad_input_kernel_ref.h"
 namespace kernel_selector {
 fully_connected_grad_input_kernel_selector::fully_connected_grad_input_kernel_selector() {
    Attach<FullyConnectedGradInputKernelRef>();
 }
 KernelsData fully_connected_grad_input_kernel_selector::GetBestKernels(const Params& params,
                                                                       const optional_params& options) const {
    return GetNaiveBestKernel(params, options, KernelType::FULLY_CONNECTED_GRAD_INPUT);
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_selector.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_selector.h
@ -1,34 +0,0 @@
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "kernel_selector.h"
 namespace kernel_selector {
 class fully_connected_grad_input_kernel_selector : public kernel_selector_base {
 public:
    static fully_connected_grad_input_kernel_selector& Instance() {
        static fully_connected_grad_input_kernel_selector instance_;
        return instance_;
    }
    fully_connected_grad_input_kernel_selector();
    virtual ~fully_connected_grad_input_kernel_selector() {}
    KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp
@ -1,93 +0,0 @@
 // Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fully_connected_grad_weights_kernel_base.h"
 #include "kernel_selector_utils.h"
 #include <algorithm>
 #include <vector>
 namespace kernel_selector {
 JitConstants FullyConnectedGradWeightsKernelBase::GetJitConstants(
    const fully_connected_grad_weights_params& params) const {
    JitConstants jit = training_kernel_base::GetJitConstants(params);
    return jit;
 }
 FullyConnectedGradWeightsKernelBase::DispatchData FullyConnectedGradWeightsKernelBase::SetDefault(
    const fully_connected_grad_weights_params& params) const {
    DispatchData kd;
    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
    size_t gws0 = params.weights.OFM().v * params.weights.IFM().v;
    size_t lws0 = std::min(gws0, static_cast<size_t>(32));
    while (gws0 % lws0) {
        lws0--;
    }
    kd.gws0 = gws0;
    kd.gws1 = params.weights.X().v;
    kd.gws2 = params.weights.Y().v;
    kd.lws0 = lws0;
    kd.lws1 = 1;
    kd.lws2 = 1;
    kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
    return kd;
 }
 KernelsData FullyConnectedGradWeightsKernelBase::GetKernelsData(const Params& params,
                                                                const optional_params& options) const {
    assert(params.GetType() == KernelType::FULLY_CONNECTED_GRAD_WEIGHTS);
    const fully_connected_grad_weights_params& orgParams =
        static_cast<const fully_connected_grad_weights_params&>(params);
    DispatchData runInfo = SetDefault(orgParams);
    KernelData kd = KernelData::Default<fully_connected_grad_weights_params>(params);
    fully_connected_grad_weights_params& newParams =
        *static_cast<fully_connected_grad_weights_params*>(kd.params.get());
    bool succeed = UpdateWeightsParams(newParams, options, WeightsLayout::oi, kd.weightsReorderParams);
    if (!succeed) {
        return {};
    }
    auto cldnn_jit = GetJitConstants(orgParams);
    auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
    auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
    auto& kernel = kd.kernels[0];
    FillCLKernelData(kernel,
                     runInfo,
                     params.engineInfo,
                     kernelName,
                     jit,
                     entry_point,
                     DEFAULT,
                     true,
                     !orgParams.bias.empty());
    if (orgParams.use_momentum) {
        kernel.arguments.push_back({ArgumentDescriptor::Types::PREV_WEIGHTS_GRADIENT, 0});
        if (!orgParams.bias.empty())
            kernel.arguments.push_back({ArgumentDescriptor::Types::PREV_BIAS_GRADIENT, 0});
    }
    kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
    kernel.arguments.push_back({ArgumentDescriptor::Types::LEARNING_RATE, 0});
    kd.estimatedTime = runInfo.efficiency;
    return {kd};
 }
 }  // namespace kernel_selector
--- a/Show More
+++ b/Show More