[IE CLDNN] Removed unused primitives and related structures (#1039)

This commit is contained in:
Vladimir Paramuzov 2020-06-30 22:18:24 +03:00 committed by GitHub
parent 66f620f97e
commit c9d4e6b934
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
301 changed files with 58 additions and 31335 deletions

View File

@ -30,7 +30,6 @@
#include <api/detection_output.hpp> #include <api/detection_output.hpp>
#include <api/normalize.hpp> #include <api/normalize.hpp>
#include <api/reshape.hpp> #include <api/reshape.hpp>
#include <api/batch_norm.hpp>
#include <api/permute.hpp> #include <api/permute.hpp>
#include <api/split.hpp> #include <api/split.hpp>
#include <api/resample.hpp> #include <api/resample.hpp>
@ -1533,49 +1532,11 @@ void Program::CreateBatchNormalizationPrimitive(cldnn::topology& topology, Infer
cldnn::primitive_id weightID = bnLayerName + "_" + m_scalesTag; cldnn::primitive_id weightID = bnLayerName + "_" + m_scalesTag;
cldnn::primitive_id biasID = bnLayerName + "_" + m_biasesTag; cldnn::primitive_id biasID = bnLayerName + "_" + m_biasesTag;
#define _SCALE_BN_OPT
#ifdef _SCALE_BN_OPT
// Using scale as an optimization (1 mad instead of mad+rsq)
// create new blobs for scale shift
CreateScaleWeightsAndBiasesFromBN(topology, bnLayer, weightID, biasID); CreateScaleWeightsAndBiasesFromBN(topology, bnLayer, weightID, biasID);
auto scalePrim = cldnn::scale(bnLayerName, inputPrimitives[0], weightID, biasID); auto scalePrim = cldnn::scale(bnLayerName, inputPrimitives[0], weightID, biasID);
topology.add(scalePrim); topology.add(scalePrim);
#else
cldnn::tensor blobTensor(0);
const auto bnDims = bnLayer->outData[0]->getTensorDesc().getDims();
switch (bnDims.size()) {
case 2:
blobTensor = cldnn::feature(TensorValue(bnDims[1]));
break;
case 4:
blobTensor = cldnn::feature(TensorValue(bnDims[1]));
break;
default:
THROW_CLDNN_EXCEPTION("Batch normalization input doesn't have 2 or 4 dimensions in " << bnLayer->name);
}
cldnn::layout blobLayout(
DataTypeFromPrecision(layer->precision),
m_defaultFormat,
blobTensor);
// Create variance primitive
cldnn::primitive_id varianceID = bnLayerName + "_" + m_weightsTag;
varianceID = CreatePrimitiveFromBlob(topology, varianceID, bnLayer->_weights, blobLayout);
// Create mean primitive
cldnn::primitive_id meanID = bnLayerName + "_" + m_biasesTag;
meanID = CreatePrimitiveFromBlob(topology, meanID, bnLayer->_biases, blobLayout);
auto bnPrim = cldnn::batch_norm(
bnLayerName,
inputPrimitives[0],
meanID,
varianceID,
bnLayer->epsilon);
topology.add(bnPrim);
#endif // _SCALE_BN_OPT
AddPrimitiveToProfiler(bnLayerName, layer); AddPrimitiveToProfiler(bnLayerName, layer);
} }

View File

@ -1,22 +0,0 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
// dllmain.cpp : Defines the entry point for the DLL application.
#ifdef _WIN32
#include <windows.h>
BOOL APIENTRY DllMain(HMODULE hModule,
DWORD ul_reason_for_call,
LPVOID lpReserved) {
switch (ul_reason_for_call) {
case DLL_PROCESS_ATTACH:
case DLL_THREAD_ATTACH:
case DLL_THREAD_DETACH:
case DLL_PROCESS_DETACH:
break;
}
return TRUE;
}
#endif

View File

@ -71,13 +71,6 @@ enum class activation_func {
gelu // (0.5*val*(1 + erf(val / sqrt(2))) gelu // (0.5*val*(1 + erf(val / sqrt(2)))
}; };
/// @brief activation gradient functions
enum class activation_grad_func {
none, // val
relu, // val * (input > 0)
relu_negative_slope, // val * ((input > 0) + a * (input <= 0) (a is additional param)
};
/// @brief activation additional params /// @brief activation additional params
struct activation_additional_params { struct activation_additional_params {
float a, b; float a, b;

View File

@ -1,96 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
#include "activation.hpp"
#include <vector>
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Activation gradient for rectified linear unit or parameterized rectified linear unit.
/// @par Algorithm:
/// out(i,x,y) = input_gradient(i,x,y) * ((input(i,x,y) > 0) + slope(i) * (input(i,x,y) <= 0)
/// @par Where:
/// @li out(i,x,y) : value at x, y from i-th feature map after activation.
/// @li in(i,x,y) : value at x, y from i-th feature map before activation.
/// @li slope(i) : the slope value of the i-th feature map (can be shared across channels or one slope per channel).
struct activation_grad : public primitive_base<activation_grad> {
CLDNN_DECLARE_PRIMITIVE(activation_grad)
/// @brief Constructs Relu grad primitive.
/// @param id This primitive id.
/// @param input_grad Input gradient primitive id.
/// @param input Input primitive id.
/// @param activation_grad_func activation_grad function.
/// @param additional_params additional params (slope).
activation_grad(const primitive_id& id,
const primitive_id& input_grad,
const primitive_id& input,
activation_grad_func activation_grad_function,
activation_additional_params additional_params = {0.f, 0.f},
const padding& output_padding = padding())
: primitive_base(id, {input_grad, input}, output_padding),
activation_grad_function(activation_grad_function),
additional_params(additional_params),
additional_params_input("") {}
/// @brief Constructs Relu grad primitive.
/// @param id This primitive id.
/// @param input_grad Input gradient primitive id.
/// @param input Input primitive id.
/// @param activation_grad_func activation_grad function.
/// @param additional_params additional params (slope).
activation_grad(const primitive_id& id,
const primitive_id& input_grad,
const primitive_id& input,
const primitive_id& additional_params_input,
activation_grad_func activation_grad_function,
const padding& output_padding = padding())
: primitive_base(id, {input_grad, input}, output_padding),
activation_grad_function(activation_grad_function),
additional_params({0, 0}),
additional_params_input(additional_params_input) {}
/// @brief activation_grad function.
activation_grad_func activation_grad_function;
/// @brief activation_grad additional params.
activation_additional_params additional_params;
/// @brief PRelu activation slope input primitive id.
/// Input x dimension should be equal to input feature size (one slope per channel).
/// All other dimensions should be 1.
primitive_id additional_params_input;
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
if (additional_params_input.empty())
return {};
return {additional_params_input};
}
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -1,111 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
#include <vector>
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Apply Adam primitive.
/// @details Updates output using Adam algorithm. The output of this primitive should be mutable_data type in case user wants to update
/// variable accross network. If output is not mutable_data then it will be initialized with 0.
/// "Adam: A Method for Stochastic Optimization" by Diederik P. Kingma, Jimmy Ba
/// @n See: https://arxiv.org/abs/1412.6980
///
/// <b>Algorithm:</b>
/// @n float lr[t] = lr * sqrt(1 - beta2^t) / (1 - beta1^t);
/// @n float m[t] = beta1 * m[t-1] + (1 - beta1) * grad[t];
/// @n float v[t] = beta2 * v[t-1] + (1 - beta2) * grad[t] * grad[t];
/// @n float result = result - lr[t] * m[t] / (sqrt(v[t]) + epsilon);
struct apply_adam : public primitive_base<apply_adam> {
CLDNN_DECLARE_PRIMITIVE(apply_adam)
/// @brief Constructs apply Adam primitive.
/// @param id This primitive id.
/// @param input Input gradient primitive id.
/// @param m Primitive id containing mean data.
/// @param v Primitive id containing variance.
/// @param beta1_power Primitive id containing beta1^t.
/// @param beta2_power Primitive id containing beta2^t.
/// @param lr Learning rate parameter.
/// @param beta1 Beta1 parameter.
/// @param beta2 Beta2 parameter.
/// @param epsilon Epsilon.
/// @param dependency_id Optional primitive id that need to complete before execution of this primitive. Used only for synchronization.
apply_adam(const primitive_id& id,
const primitive_id& input,
const primitive_id& m,
const primitive_id& v,
const primitive_id& beta1_power,
const primitive_id& beta2_power,
float lr,
float beta1,
float beta2,
float epsilon,
const primitive_id& dependency_id = "",
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
m(m),
v(v),
beta1_power(beta1_power),
beta2_power(beta2_power),
lr(lr),
beta1(beta1),
beta2(beta2),
epsilon(epsilon),
dependency_id(dependency_id) {}
/// @brief Primitive id containing m data.
primitive_id m;
/// @brief Primitive id containing v data.
primitive_id v;
/// @brief Primitive id containing beta1^t.
primitive_id beta1_power;
/// @brief Primitive id containing beta2^t.
primitive_id beta2_power;
/// @brief Learning rate parameter.
float lr;
/// @brief Beta1 parameter.
float beta1;
/// @brief Beta2 parameter.
float beta2;
/// @brief Epsilon.
float epsilon;
/// @brief Optional primitive id that need to complete before execution of this primitive. Used only for synchronization.
primitive_id dependency_id;
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
std::vector<std::reference_wrapper<const primitive_id>> ret{m, v, beta1_power, beta2_power};
ret.reserve(!dependency_id.empty());
if (!dependency_id.empty())
ret.push_back(dependency_id);
return ret;
}
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -1,184 +0,0 @@
/*
// Copyright (c) 2016 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
#include <vector>
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Batch normalization primitive.
/// @details Performs batch normalization as discribed in
/// "Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift" by Ioffe, Szegedy
/// @n See: http://arxiv.org/abs/1502.03167
///
/// <b>Algorithm:</b>
/// @n global stats can be computed as:
/// @n out[i] = ( (in[i] - mean[b]) / sqrt(variance[b] + epsilon) ) * scale[b] + shift[b]
struct batch_norm : public primitive_base<batch_norm> {
CLDNN_DECLARE_PRIMITIVE(batch_norm)
/// @brief Constructs batch normalization primitive.
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param mean Primitive id containing mean data.
/// @param variance Primitive id containing variance.
/// @param epsilon Epsilon.
batch_norm(const primitive_id& id,
const primitive_id& input,
const primitive_id& mean,
const primitive_id& variance,
float epsilon,
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
mean(mean),
variance(variance),
inv_variance(""),
epsilon(epsilon) {}
/// @brief Constructs batch normalization primitive.
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param mean Primitive id containing mean data.
/// @param variance Primitive id containing variance.
/// @brief scale Primitive id containing scale.
/// @brief shift Primitive id containing shift.
/// @param epsilon Epsilon.
batch_norm(const primitive_id& id,
const primitive_id& input,
const primitive_id& mean,
const primitive_id& variance,
const primitive_id& scale,
const primitive_id& shift,
float epsilon,
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
mean(mean),
variance(variance),
scale(scale),
shift(shift),
inv_variance(""),
epsilon(epsilon) {}
/// @brief Constructs batch normalization primitive with mean and variance calculation (used for training).
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param epsilon Epsilon.
/// @param inv_variance Primitive id containing inverted variance calculated in this primitive. For inference leave empty.
batch_norm(const primitive_id& id,
const primitive_id& input,
float epsilon,
const primitive_id& inv_variance = "",
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
mean(""),
variance(""),
inv_variance(inv_variance),
epsilon(epsilon) {}
/// @brief Constructs batch normalization primitive with mean and variance calculation (used for training).
/// @param id This primitive id.
/// @param input Input primitive id.
/// @brief scale Primitive id containing scale.
/// @brief shift Primitive id containing shift.
/// @param epsilon Epsilon.
/// @param inv_variance Primitive id containing inverted variance calculated in this primitive. For inference leave empty.
batch_norm(const primitive_id& id,
const primitive_id& input,
float epsilon,
const primitive_id& scale,
const primitive_id& shift,
const primitive_id& inv_variance = "",
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
mean(""),
variance(""),
scale(scale),
shift(shift),
inv_variance(inv_variance),
epsilon(epsilon) {}
/// @brief Constructs batch normalization primitive with mean and variance calculation (used for training).
/// @param id This primitive id.
/// @param input Input primitive id.
/// @brief scale Primitive id containing scale.
/// @brief shift Primitive id containing shift.
/// @brief mean_out Primitive id containing mean output.
/// @brief variance_out Primitive id containing variance output.
/// @param epsilon Epsilon.
/// @param inv_variance Primitive id containing inverted variance calculated in this primitive. For inference leave empty.
batch_norm(const primitive_id& id,
const primitive_id& input,
float epsilon,
const primitive_id& mean_out,
const primitive_id& variance_out,
const primitive_id& scale,
const primitive_id& shift,
const primitive_id& inv_variance = "",
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
mean(mean_out),
variance(variance_out),
scale(scale),
shift(shift),
inv_variance(inv_variance),
epsilon(epsilon) {}
/// @brief Primitive id containing mean data.
primitive_id mean;
/// @brief Primitive id containing variance.
primitive_id variance;
/// @brief Primitive id containing scale.
primitive_id scale;
/// @brief Primitive id containing shift.
primitive_id shift;
/// @brief Primitive id containing inverted variance used in future gradient computing.
primitive_id inv_variance;
/// @brief Epsilon.
float epsilon;
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
std::vector<std::reference_wrapper<const primitive_id>> deps;
if (!mean.empty() && !variance.empty()) {
deps.push_back(mean);
deps.push_back(variance);
}
if (!scale.empty() && !shift.empty()) {
deps.push_back(scale);
deps.push_back(shift);
}
if (!inv_variance.empty())
deps.push_back(inv_variance);
return deps;
}
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -1,61 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
#include <vector>
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Performs backward batch normalization layer.
/// @details Calculates mean gradient and gradient * input for every feature in data,
/// then output is calculated as inv_variance * (input_grad - mean_grad_input * input - mean_grad)
struct batch_norm_grad : public primitive_base<batch_norm_grad> {
CLDNN_DECLARE_PRIMITIVE(batch_norm_grad)
/// @brief Constructs batch normalization backward layer.
/// @param id This primitive id.
/// @param input_grad Input gradient primitive id.
/// @param input Input primitive id.
/// @param inv_variance Primitive id containing inverted variance from forward pass.
batch_norm_grad(
const primitive_id& id,
const primitive_id& input_grad,
const primitive_id& input,
const primitive_id& inv_variance,
const padding& output_padding = padding())
: primitive_base(id, {input_grad, input}, output_padding), inv_variance(inv_variance) {
}
/// @brief Primitive id containing inverted variance from forward pass.
primitive_id inv_variance;
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
return {inv_variance};
}
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -1,95 +0,0 @@
// Copyright (c) 2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
#include <vector>
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Select mode for the @ref contract layer.
enum class contract_mode : int32_t {
/// @brief Sum reduction.
sum,
/// @brief Product reduction.
prod,
/// @brief All reduction.
all,
/// @brief Any reduction.
any,
/// @brief Max reduction.
max
};
/// @brief Reduces input with an operation defined by @p mode along defined
/// by @p reduction_axes dimensions.
///
/// @details Reduces the input using the binary operation determined by
/// @p mode. The @p reduction_axes determine the final shape of the
/// output, which is calculated based on the input shape by
/// collapsing the dimensions along which the reduction happens.
/// For example, for the input with
/// @n <tt>input_sizes = (in_b, in_f, in_y, in_x)</tt>
/// @n a reduction with
/// @n <tt>reduction_axes = (2)</tt>
/// @n would collapse the Y dimension, producing
/// @n <tt>output_shape = (1, in_b, in_f, in_x)</tt>
/// @n where every element is a @p mode reduction of the input elements with
/// @n the same B, F and X coordinates.
/// @n
/// @n@b Requirements:
/// @n - @p reduction_axes size (dimensions count) must be within (inclusive) range
/// 1 - 4.
/// @n - @p reduction_axes mustn't have duplicate values.
/// @n - Values of @p reduction_axes must be within (inclusive) range 0 - 3
/// @n Breaking any of these conditions will raise an exception.
struct contract : public primitive_base<contract> {
CLDNN_DECLARE_PRIMITIVE(contract)
/// @brief Constructs contract primitive / layer.
///
/// @param id An identifier of new primitive.
/// @param input An identifier of primitive which is an input for newly created
/// contract primitive.
/// @param mode Reduction mode.
/// @param reduction_axes Axes positions (0-based, from left to right) in input_shape
/// that are being reduced.
/// @param output_padding Optional padding for output from primitive.
contract(
const primitive_id& id,
const primitive_id& input,
contract_mode mode,
const std::vector<uint16_t>& reduction_axes = {},
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
mode(mode),
reduction_axes(reduction_axes) {
}
/// @param mode Contract mode.
contract_mode mode;
/// @brief Array of axes positions from input shape (0-based, from left to right)
/// along which reduction should happen.
std::vector<uint16_t> reduction_axes;
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -1,95 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "deconvolution.hpp"
#include "primitive.hpp"
#include <vector>
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Performs backward convolution operation for input.
/// @details convolution_grad_input is similar to deconvolution layer without biases and activation support.
/// It actually uses deconvolution primitive underneath with gradient bool set to true.
struct convolution_grad_input : public deconvolution {
/// @brief Constructs convolution_grad_input primitive.
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param weights List of primitive ids containing weights data.
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
/// where (0,0) point of the convolution_grad_input window should start calculations.
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
/// @param with_activation Enables Relu activation.
/// @param activation_slp Relu activation slope.
convolution_grad_input(const primitive_id& id,
const primitive_id& input,
const std::vector<primitive_id>& weights,
tensor stride = {1, 1, 1, 1},
tensor input_offset = {0, 0, 0, 0},
const padding& output_padding = padding())
: deconvolution(id, input, {weights}, stride, input_offset, output_padding, true) {}
/// @brief Constructs convolution_grad_input primitive (computes input paddings to match output size).
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param weights List of primitive ids containing weights data.
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
/// where (0,0) point of the convolution_grad_input window should start calculations.
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
/// @param with_activation Enables Relu activation.
/// @param activation_slp Relu activation slope.
/// @param output_size User-defined output data size of the primitive (w/o padding).
convolution_grad_input(const primitive_id& id,
const primitive_id& input,
const std::vector<primitive_id>& weights,
tensor stride,
tensor input_offset,
tensor output_size,
const padding& output_padding = padding())
: deconvolution(id, input, {weights}, stride, input_offset, output_size, output_padding, true) {}
/// @brief Constructs convolution_grad_input primitive (computes input paddings to match output size).
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param weights List of primitive ids containing weights data.
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
/// where (0,0) point of the convolution_grad_input window should start calculations.
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
/// @param with_activation Enables Relu activation.
/// @param activation_slp Relu activation slope.
/// @param output_size User-defined output data size of the primitive (w/o padding).
/// @return convolution_grad_input primitive with specified settings.
static convolution_grad_input create_with_output_size(const primitive_id& id,
const primitive_id& input,
const std::vector<primitive_id>& weights,
tensor output_size,
tensor stride = {1, 1, 1, 1},
tensor input_offset = {0, 0, 0, 0},
const padding& output_padding = padding()) {
return convolution_grad_input(id, input, weights, stride, input_offset, output_size, output_padding);
}
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -1,217 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
#include <vector>
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Performs backward convolution operation for weights and biases.
/// @details convolution_grad_weights updates weights and bias mutable data for training purposes.
/// @details Please note that this primitive was not heavily tested and currently only batch=1 is enabled for this primitive.
struct convolution_grad_weights
: public primitive_base<convolution_grad_weights> {
CLDNN_DECLARE_PRIMITIVE(convolution_grad_weights)
/// @brief Constructs convolution_grad_weights primitive.
/// @param id This primitive id.
/// @param input Input gradient primitive id.
/// @param input Input primitive id from convolution forward pass.
/// @param weights List of primitive ids containing weights data.
/// @param bias List of primitive ids containing bias data. Provide empty vector if using next parameters without bias.
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
/// where (0,0) point of the convolution_grad_weights window should start calculations.
/// @param dilation Defines dilation size.
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
/// @param conv_grad Id of primitive which uses weights and biases updated in this primitive.
/// This is for correct order of calculating. Leave empty if primitive is last in backward pass.
convolution_grad_weights(const primitive_id& id,
const primitive_id& input_grad,
const primitive_id& input,
const std::vector<primitive_id>& weights,
const std::vector<primitive_id>& bias,
tensor stride = {1, 1, 1, 1},
tensor input_offset = {0, 0, 0, 0},
tensor dilation = {1, 1, 1, 1},
const primitive_id& conv_grad = "",
const padding& output_padding = padding())
: primitive_base(id, {input_grad, input}, output_padding),
conv_grad(conv_grad),
stride(stride),
input_offset(input_offset),
dilation(dilation),
output_grad_w(false),
weights(weights),
bias(bias),
prev_weights_grad(std::vector<primitive_id>(0)),
prev_bias_grad(std::vector<primitive_id>(0)) {}
/// @brief Constructs convolution_grad_weights primitive (w/o bias).
/// @param id This primitive id.
/// @param input Input gradient primitive id.
/// @param input Input primitive id from convolution forward pass.
/// @param weights List of primitive ids containing weights data.
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
/// where (0,0) point of the convolution_grad_weights window should start calculations.
/// @param dilation Defines dilation size.
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
/// @param Should primitive give weights gradient (delta) as an output
/// @param conv_grad Id of primitive which uses weights and biases updated in this primitive.
/// This is for correct order of calculating. Leave empty if primitive is last in backward pass.
convolution_grad_weights(const primitive_id& id,
const primitive_id& input_grad,
const primitive_id& input,
const std::vector<primitive_id>& weights,
tensor stride = {1, 1, 1, 1},
tensor input_offset = {0, 0, 0, 0},
tensor dilation = {1, 1, 1, 1},
bool output_grad_w = false,
const primitive_id& conv_grad = "",
const padding& output_padding = padding())
: primitive_base(id, {input_grad, input}, output_padding),
conv_grad(conv_grad),
stride(stride),
input_offset(input_offset),
dilation(dilation),
output_grad_w(output_grad_w),
weights(weights),
bias(std::vector<primitive_id>(0)),
prev_weights_grad(std::vector<primitive_id>(0)),
prev_bias_grad(std::vector<primitive_id>(0)) {}
/// @brief Constructs convolution_grad_weights primitive (w/o bias).
/// @param id This primitive id.
/// @param input Input gradient primitive id.
/// @param input Input primitive id from convolution forward pass.
/// @param weights List of primitive ids containing weights data.
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
/// where (0,0) point of the convolution_grad_weights window should start calculations.
/// @param dilation Defines dilation size.
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
/// @param conv_grad Id of primitive which uses weights and biases updated in this primitive.
/// This is for correct order of calculating. Leave empty if primitive is last in backward pass.
convolution_grad_weights(const primitive_id& id,
const primitive_id& input_grad,
const primitive_id& input,
const std::vector<primitive_id>& weights,
tensor stride,
tensor input_offset,
tensor dilation,
const primitive_id& conv_grad = "",
const padding& output_padding = padding())
: primitive_base(id, {input_grad, input}, output_padding),
conv_grad(conv_grad),
stride(stride),
input_offset(input_offset),
dilation(dilation),
output_grad_w(false),
weights(weights),
bias(std::vector<primitive_id>(0)),
prev_weights_grad(std::vector<primitive_id>(0)),
prev_bias_grad(std::vector<primitive_id>(0)) {}
/// @brief Constructs convolution_grad_weights primitive with momentum optimizer.
/// @param id This primitive id.
/// @param input Input gradient primitive id.
/// @param input Input primitive id from convolution forward pass.
/// @param weights List of primitive ids containing weights data.
/// @param bias List of primitive ids containing bias data. Provide empty vector if using next parameters without bias.
/// @param prev_weights_grad List of primitive ids which contains weights gradient data calculated in previous iteration. Used in momentum optimizer.
/// @param prev_bias_grad List of primitive ids which contains bias gradient data calculated in previous iteration. Used in momentum optimizer.
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
/// where (0,0) point of the convolution_grad_weights window should start calculations.
/// @param dilation Defines dilation size.
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
/// @param conv_grad Id of primitive which uses weights and biases updated in this primitive.
/// This is for correct order of calculating. Leave empty if primitive is last in backward pass.
convolution_grad_weights(const primitive_id& id,
const primitive_id& input_grad,
const primitive_id& input,
const std::vector<primitive_id>& weights,
const std::vector<primitive_id>& bias,
const std::vector<primitive_id>& prev_weights_grad,
const std::vector<primitive_id>& prev_bias_grad,
tensor stride = {1, 1, 1, 1},
tensor input_offset = {0, 0, 0, 0},
tensor dilation = {1, 1, 1, 1},
const primitive_id& conv_grad = "",
const padding& output_padding = padding())
: primitive_base(id, {input_grad, input}, output_padding),
conv_grad(conv_grad),
stride(stride),
input_offset(input_offset),
dilation(dilation),
output_grad_w(false),
weights(weights),
bias(bias),
prev_weights_grad(prev_weights_grad),
prev_bias_grad(prev_bias_grad) {}
/// @brief Primitive id containing convolution gradient data.
primitive_id conv_grad;
/// @brief Defines shift in input buffer between adjacent calculations of output values.
tensor stride;
/// @brief Defines a shift, relative to (0,0) position of the input buffer,
/// where (0,0) point of the convolution_grad_weights window should start calculations.
tensor input_offset;
/// @brief Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
/// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
/// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
tensor dilation;
/// @brief Should primitive give weights gradient (delta) as an output
bool output_grad_w;
/// @brief List of primitive ids containing weights data.
const primitive_id_arr weights;
/// @brief List of primitive ids containing bias data.
const primitive_id_arr bias;
/// @brief Array of primitive ids containing weights gradient data calculated in previous iteration.
/// Amount of primitives and their memory sizes should be same as weights.
const primitive_id_arr prev_weights_grad;
/// @brief Array of primitive ids containing bias gradient data calculated in previous iteration.
/// Amount of primitives and their memory sizes should be same as biases.
const primitive_id_arr prev_bias_grad;
/// @brief On how many cards split the computation to.
int32_t split() const { return static_cast<int32_t>(weights.size()); }
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
std::vector<std::reference_wrapper<const primitive_id>> ret;
ret.reserve(weights.size() + bias.size() + !conv_grad.empty() + prev_weights_grad.size() +
prev_bias_grad.size());
for (auto& w : weights) ret.push_back(std::ref(w));
for (auto& b : bias) ret.push_back(std::ref(b));
for (auto& g : prev_weights_grad) ret.push_back(std::ref(g));
for (auto& g : prev_bias_grad) ret.push_back(std::ref(g));
if (!conv_grad.empty())
ret.push_back(conv_grad);
return ret;
}
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -56,8 +56,7 @@ struct deconvolution : public primitive_base<deconvolution> {
with_output_size(false), with_output_size(false),
groups(1), groups(1),
weights(weights), weights(weights),
bias(bias), bias(bias) {}
_gradient(false) {}
/// @brief Constructs deconvolution primitive. /// @brief Constructs deconvolution primitive.
/// @param id This primitive id. /// @param id This primitive id.
/// @param input Input primitive id. /// @param input Input primitive id.
@ -83,8 +82,7 @@ struct deconvolution : public primitive_base<deconvolution> {
with_output_size(false), with_output_size(false),
groups(groups), groups(groups),
weights(weights), weights(weights),
bias(bias), bias(bias) {}
_gradient(false) {}
/// @brief Constructs deconvolution primitive (w/o bias). /// @brief Constructs deconvolution primitive (w/o bias).
/// @param id This primitive id. /// @param id This primitive id.
@ -100,16 +98,14 @@ struct deconvolution : public primitive_base<deconvolution> {
const std::vector<primitive_id>& weights, const std::vector<primitive_id>& weights,
tensor stride = {1, 1, 1, 1}, tensor stride = {1, 1, 1, 1},
tensor input_offset = {0, 0, 0, 0}, tensor input_offset = {0, 0, 0, 0},
const padding& output_padding = padding(), const padding& output_padding = padding())
bool gradient = false)
: primitive_base(id, {input}, output_padding), : primitive_base(id, {input}, output_padding),
input_offset(input_offset), input_offset(input_offset),
stride(stride), stride(stride),
with_output_size(false), with_output_size(false),
groups(1), groups(1),
weights(weights), weights(weights),
bias(std::vector<primitive_id>(0)), bias(std::vector<primitive_id>(0)) {}
_gradient(gradient) {}
/// @brief Constructs deconvolution primitive (w/o bias). /// @brief Constructs deconvolution primitive (w/o bias).
/// @param id This primitive id. /// @param id This primitive id.
@ -127,16 +123,14 @@ struct deconvolution : public primitive_base<deconvolution> {
uint32_t groups, uint32_t groups,
tensor stride = {1, 1, 1, 1}, tensor stride = {1, 1, 1, 1},
tensor input_offset = {0, 0, 0, 0}, tensor input_offset = {0, 0, 0, 0},
const padding& output_padding = padding(), const padding& output_padding = padding())
bool gradient = false)
: primitive_base(id, {input}, output_padding), : primitive_base(id, {input}, output_padding),
input_offset(input_offset), input_offset(input_offset),
stride(stride), stride(stride),
with_output_size(false), with_output_size(false),
groups(groups), groups(groups),
weights(weights), weights(weights),
bias(std::vector<primitive_id>(0)), bias(std::vector<primitive_id>(0)) {}
_gradient(gradient) {}
/// @brief Constructs deconvolution primitive (computes input paddings to match output size). /// @brief Constructs deconvolution primitive (computes input paddings to match output size).
/// @param id This primitive id. /// @param id This primitive id.
@ -164,8 +158,7 @@ struct deconvolution : public primitive_base<deconvolution> {
output_size(output_size), output_size(output_size),
groups(1), groups(1),
weights(weights), weights(weights),
bias(bias), bias(bias) {}
_gradient(false) {}
/// @brief Constructs deconvolution primitive (computes input paddings to match output size). /// @brief Constructs deconvolution primitive (computes input paddings to match output size).
/// @param id This primitive id. /// @param id This primitive id.
@ -195,8 +188,7 @@ struct deconvolution : public primitive_base<deconvolution> {
output_size(output_size), output_size(output_size),
groups(groups), groups(groups),
weights(weights), weights(weights),
bias(bias), bias(bias) {}
_gradient(false) {}
/// @brief Constructs deconvolution primitive (w/o bias, computes input paddings to match output size). /// @brief Constructs deconvolution primitive (w/o bias, computes input paddings to match output size).
/// @param id This primitive id. /// @param id This primitive id.
@ -214,8 +206,7 @@ struct deconvolution : public primitive_base<deconvolution> {
tensor stride, tensor stride,
tensor input_offset, tensor input_offset,
tensor output_size, tensor output_size,
const padding& output_padding = padding(), const padding& output_padding = padding())
bool gradient = false)
: primitive_base(id, {input}, output_padding), : primitive_base(id, {input}, output_padding),
input_offset(input_offset), input_offset(input_offset),
stride(stride), stride(stride),
@ -223,8 +214,7 @@ struct deconvolution : public primitive_base<deconvolution> {
output_size(output_size), output_size(output_size),
groups(1), groups(1),
weights(weights), weights(weights),
bias(std::vector<primitive_id>(0)), bias(std::vector<primitive_id>(0)) {}
_gradient(gradient) {}
/// @brief Constructs deconvolution primitive (computes input paddings to match output size). /// @brief Constructs deconvolution primitive (computes input paddings to match output size).
/// @param id This primitive id. /// @param id This primitive id.
@ -300,12 +290,8 @@ struct deconvolution : public primitive_base<deconvolution> {
/// @brief On how many cards split the computation to. /// @brief On how many cards split the computation to.
int32_t split() const { return static_cast<int32_t>(weights.size()); } int32_t split() const { return static_cast<int32_t>(weights.size()); }
/// @brief Indicates that deconvolution is used for convolution backward computation (convolution_grad_input)
bool gradient() const { return _gradient; }
protected: protected:
bool _gradient;
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
std::vector<std::reference_wrapper<const primitive_id>> ret; std::vector<std::reference_wrapper<const primitive_id>> ret;
ret.reserve(weights.size() + bias.size()); ret.reserve(weights.size() + bias.size());

View File

@ -92,13 +92,9 @@ struct eltwise : public primitive_base<eltwise> {
eltwise_mode mode, eltwise_mode mode,
const padding& output_padding = padding()) const padding& output_padding = padding())
: primitive_base(id, {input, input2}, output_padding), : primitive_base(id, {input, input2}, output_padding),
output_calibration_factors(""),
output_quantization_factor(1.0f),
input_quantization_factors(0),
mode(mode), mode(mode),
coefficients(std::vector<float>(0)), coefficients(std::vector<float>(0)),
stride(std::vector<tensor>(0)), stride(std::vector<tensor>(0)) {}
inputs_calibration_factors(std::vector<primitive_id>(0)) {}
/// @brief Constructs eltwise primitive. /// @brief Constructs eltwise primitive.
/// @param id This primitive id. /// @param id This primitive id.
@ -115,13 +111,9 @@ struct eltwise : public primitive_base<eltwise> {
eltwise_mode mode, eltwise_mode mode,
const padding& output_padding = padding()) const padding& output_padding = padding())
: primitive_base(id, {input, input2}, output_padding), : primitive_base(id, {input, input2}, output_padding),
output_calibration_factors(""),
output_quantization_factor(1.0f),
input_quantization_factors(0),
mode(mode), mode(mode),
coefficients(std::vector<float>(0)), coefficients(std::vector<float>(0)),
stride(stride), stride(stride) {}
inputs_calibration_factors(std::vector<primitive_id>(0)) {}
/// @brief Constructs eltwise primitive. /// @brief Constructs eltwise primitive.
/// @param id This primitive id. /// @param id This primitive id.
@ -134,13 +126,9 @@ struct eltwise : public primitive_base<eltwise> {
data_types data_type, data_types data_type,
const padding& output_padding = padding()) const padding& output_padding = padding())
: primitive_base(id, inputs, output_padding, optional_data_type{data_type}), : primitive_base(id, inputs, output_padding, optional_data_type{data_type}),
output_calibration_factors(""),
output_quantization_factor(1.0f),
input_quantization_factors(0),
mode(mode), mode(mode),
coefficients(std::vector<float>(0)), coefficients(std::vector<float>(0)),
stride(std::vector<tensor>(0)), stride(std::vector<tensor>(0)) {}
inputs_calibration_factors(std::vector<primitive_id>(0)) {}
/// @brief Constructs eltwise primitive. /// @brief Constructs eltwise primitive.
/// @param id This primitive id. /// @param id This primitive id.
@ -151,13 +139,9 @@ struct eltwise : public primitive_base<eltwise> {
eltwise_mode mode, eltwise_mode mode,
const padding& output_padding = padding()) const padding& output_padding = padding())
: primitive_base(id, inputs, output_padding), : primitive_base(id, inputs, output_padding),
output_calibration_factors(""),
output_quantization_factor(1.0f),
input_quantization_factors(0),
mode(mode), mode(mode),
coefficients(std::vector<float>(0)), coefficients(std::vector<float>(0)),
stride(std::vector<tensor>(0)), stride(std::vector<tensor>(0)) {}
inputs_calibration_factors(std::vector<primitive_id>(0)) {}
/// @brief Constructs eltwise primitive. /// @brief Constructs eltwise primitive.
/// @param id This primitive id. /// @param id This primitive id.
@ -171,13 +155,9 @@ struct eltwise : public primitive_base<eltwise> {
data_types data_type, data_types data_type,
const padding& output_padding = padding()) const padding& output_padding = padding())
: primitive_base(id, inputs, output_padding, optional_data_type{data_type}), : primitive_base(id, inputs, output_padding, optional_data_type{data_type}),
output_calibration_factors(""),
output_quantization_factor(1.0f),
input_quantization_factors(0),
mode(mode), mode(mode),
coefficients(coefficients), coefficients(coefficients),
stride(std::vector<tensor>(0)), stride(std::vector<tensor>(0)) {
inputs_calibration_factors(std::vector<primitive_id>(0)) {
if (mode == eltwise_mode::sum && !coefficients.empty() && coefficients.size() != inputs.size()) { if (mode == eltwise_mode::sum && !coefficients.empty() && coefficients.size() != inputs.size()) {
throw std::invalid_argument("Invalid eltwise sum coefficients count (should be equal to 0 or input.size)"); throw std::invalid_argument("Invalid eltwise sum coefficients count (should be equal to 0 or input.size)");
} }
@ -186,31 +166,12 @@ struct eltwise : public primitive_base<eltwise> {
} }
} }
/// @brief Primitive id containing output quanitization factors per output feature map.
primitive_id output_calibration_factors;
/// @brief Output quantization factor
float output_quantization_factor;
/// @brief List of quantization factors per input.
std::vector<float> input_quantization_factors;
/// @param mode Eltwise mode. /// @param mode Eltwise mode.
eltwise_mode mode; eltwise_mode mode;
/// @param coefficients Blob-wise coefficient for SUM operation. /// @param coefficients Blob-wise coefficient for SUM operation.
std::vector<float> coefficients; std::vector<float> coefficients;
/// @brief Defines shift in input buffers between adjacent calculations of output values. /// @brief Defines shift in input buffers between adjacent calculations of output values.
std::vector<tensor> stride; std::vector<tensor> stride;
/// @brief List of primitive ids containing input quantization factors per feature map, one primitive id for each input.
const primitive_id_arr inputs_calibration_factors;
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
std::vector<std::reference_wrapper<const primitive_id>> ret;
if (!output_calibration_factors.empty())
ret.push_back(output_calibration_factors);
for (auto& icf : inputs_calibration_factors) ret.push_back(std::ref(icf));
return ret;
}
}; };
/// @} /// @}
/// @} /// @}

View File

@ -1,79 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
#include <vector>
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief
/// @details Performs embedding upon input.
/// @n\b Example:
/// @n input_size = { 8, 1, 1, 75 };
/// @n weights_size = {15, 1, 62, 1 };
/// @n output_size = { 8, 75, 15, 1 };
/// @par Algorithm:
/// @par Where:
struct embed : public primitive_base<embed> {
CLDNN_DECLARE_PRIMITIVE(embed)
/// @brief Constructs embed primitive.
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param weights Primitive id containing weights data.
/// @param bias Primitive id containing bias data.
embed(
const primitive_id& id,
const primitive_id& input,
const primitive_id& weights,
const primitive_id& bias)
: primitive_base(id, {input}), weights(weights), bias(bias) {}
/// @brief Constructs embed primitive.
/// @param id This primitive id.
/// @param input Input primitive id.
embed(
const primitive_id& id,
const primitive_id& input,
const primitive_id& weights)
: primitive_base(id, {input}), weights(weights), bias("") {}
/// @brief Primitive id containing weights data.
primitive_id weights;
/// @brief Primitive id containing bias data.
primitive_id bias;
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
if (bias.empty())
return {weights};
else
return {weights, bias};
}
};
/// @}
/// @}
/// @}
} // namespace cldnn
#pragma once

View File

@ -1,59 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
#include <vector>
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Performs backward fully connected layer (inner product) for input.
struct fully_connected_grad_input : public primitive_base<fully_connected_grad_input> {
CLDNN_DECLARE_PRIMITIVE(fully_connected_grad_input)
/// @brief Constructs fully connected layer grad for input.
/// @param id This primitive id.
/// @param input_grad Input gradient primitive id.
/// @param input Input primitive id.
/// @param weights Primitive id containing weights data.
fully_connected_grad_input(
const primitive_id& id,
const primitive_id& input_grad,
const primitive_id& input,
const primitive_id& weights,
const padding& output_padding = padding())
: primitive_base(id, {input_grad, input}, output_padding), weights(weights) {
}
/// @brief Primitive id containing weights data.
primitive_id weights;
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
return {weights};
}
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -1,115 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
#include <vector>
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Performs backward fully connected layer (inner product) for weights and biases.
struct fully_connected_grad_weights
: public primitive_base<fully_connected_grad_weights> {
CLDNN_DECLARE_PRIMITIVE(fully_connected_grad_weights)
/// @brief Constructs fully connected layer for weights and biases.
/// @param id This primitive id.
/// @param input Input gradient primitive id.
/// @param input Input primitive id.
/// @param weights Primitive id containing weights data.
/// @param bias Primitive id containing bias data. Provide empty string if using Relu without bias.
/// @param fc_grad Id of primitive which uses weights and biases updated in this primitive.
/// This is for correct order of calculating. Leave empty if primitive is last in backward pass.
fully_connected_grad_weights(const primitive_id& id,
const primitive_id& input_grad,
const primitive_id& input,
const primitive_id& weights,
const primitive_id& bias = "",
const primitive_id& fc_grad = "",
const padding& output_padding = padding())
: primitive_base(id, {input_grad, input}, output_padding),
weights(weights),
bias(bias),
fc_grad(fc_grad),
prev_weights_grad(""),
prev_bias_grad("") {}
/// @brief Constructs fully connected layer for weights and biases with momentum optimizer.
/// @param id This primitive id.
/// @param input Input gradient primitive id.
/// @param input Input primitive id.
/// @param weights Primitive id containing weights data.
/// @param bias Primitive id containing bias data. Provide empty string if using Relu without bias.
/// @param prev_weights_grad Id of primitive which contains weights gradient data calculated in previous iteration. Used in momentum optimizer.
/// @param prev_bias_grad Id of primitive which contains bias gradient data calculated in previous iteration. Used in momentum optimizer.
/// @param fc_grad Id of primitive which uses weights and biases updated in this primitive. This is for correct order of calculating.
fully_connected_grad_weights(const primitive_id& id,
const primitive_id& input_grad,
const primitive_id& input,
const primitive_id& weights,
const primitive_id& bias,
const primitive_id& prev_weights_grad,
const primitive_id& prev_bias_grad,
const primitive_id& fc_grad = "",
const padding& output_padding = padding())
: primitive_base(id, {input_grad, input}, output_padding),
weights(weights),
bias(bias),
fc_grad(fc_grad),
prev_weights_grad(prev_weights_grad),
prev_bias_grad(prev_bias_grad) {}
/// @brief Primitive id containing weights data.
primitive_id weights;
/// @brief Primitive id containing bias data.
primitive_id bias;
/// @brief Primitive id containing fully connected gradient data.
primitive_id fc_grad;
/// @brief Id of primitive containing weights gradient data calculated in previous iteration. It's memory size should be same as weights.
primitive_id prev_weights_grad;
/// @brief Id of primitive containing bias gradient data calculated in previous iteration. It's memory size should be same as biases.
primitive_id prev_bias_grad;
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
std::vector<std::reference_wrapper<const primitive_id>> ret;
ret.reserve(1 + !bias.empty() + !fc_grad.empty() + !prev_weights_grad.empty() + !prev_bias_grad.empty());
ret.push_back(weights);
if (!bias.empty())
ret.push_back(bias);
if (!prev_weights_grad.empty())
ret.push_back(prev_weights_grad);
if (!prev_bias_grad.empty())
ret.push_back(prev_bias_grad);
if (!fc_grad.empty())
ret.push_back(fc_grad);
return ret;
}
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -1,109 +0,0 @@
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
#include <vector>
namespace cldnn {
/// @brief Axis which index_select primitive will index.
enum class index_select_axis_name {
along_b,
along_f,
along_y,
along_x
};
/// @brief Select index, which will be copied to the output..
///
/// @details Applies index selecting along specified dimension. The indices, which will be copied are specifed by
/// by @c indices.
/// @n
/// @n Example:
/// @n <tt>input_sizes = (1, 2, 4, 2)</tt>
/// @n <tt>input_values = (a, b, c, d)</tt>
/// @n <tt> (e, f, g, h)</tt>
/// @n <tt>indices_sizes = (1, 1, 6, 1)</tt>
/// @n <tt>indices_values = {0, 0, 1, 1, 3, 3}</tt>
/// @n For axis: along_x:
/// @n <tt>output_sizes = (1, 2, 6, 2)</tt>
/// @n <tt>output_values = (a, a, b, b, d, d)</tt>
/// @n <tt> (e, e, f, f, h, h)</tt>
/// @n
/// @n The resulting output will have sizes equal to input_size with changed concrete tensor size to inidices x size.
/// @n
/// @n@b Requirements:
/// @n - @c input must be a valid primitive_id, which output's format is bfyx/yxfb;
/// @n - @c indices must be a valid primitive_id, which output's layout is: (bfyx/yxfb, i32, {1, 1, indicies_size, 1})
/// @n - @c axis - valid index_select_axis_name instance.
/// @n Breaking any of this conditions will cause exeption throw.
struct index_select : public primitive_base<index_select> {
CLDNN_DECLARE_PRIMITIVE(index_select)
/// @brief Constructs index_select primitive / layer.
///
/// @param id An identifier of new primitive.
/// @param input An identifier of primitive, which is an input for newly created
/// index_select primitive.
/// @param indicies An identifer of primitive, which have indices in memory distributed along x.
/// @param axis Axis of index selecting.
/// @param output_padding Optional padding for output from primitive.
index_select(
const primitive_id& id,
const primitive_id& input,
const primitive_id& indices,
index_select_axis_name axis = index_select_axis_name::along_b,
const padding& output_padding = padding())
: primitive_base(id, {input, indices}, output_padding), axis({axis}), reverse(false) {}
/// @brief Constructs index_select primitive / layer.
///
/// @param id An identifier of new primitive.
/// @param input An identifier of primitive, which is an input for newly created
/// index_select primitive.
/// @param axis Axis of index selecting.
/// @param output_padding Optional padding for output from primitive.
index_select(
const primitive_id& id,
const primitive_id& input,
index_select_axis_name axis = index_select_axis_name::along_b,
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding), axis({axis}), reverse(true) {}
/// @brief Constructs index_select primitive / layer.
///
/// @param id An identifier of new primitive.
/// @param input An identifier of primitive, which is an input for newly created
/// index_select primitive.
/// @param axis Vector of axes of index selecting.
/// @param output_padding Optional padding for output from primitive.
index_select(
const primitive_id& id,
const primitive_id& input,
const std::vector<index_select_axis_name>& axis = {index_select_axis_name::along_b},
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding), axis(axis), reverse(true) {}
/// @brief A list of axes of index selecting
std::vector<index_select_axis_name> axis;
/// @brief Do index_select in reverse order on axis/axes.
bool reverse;
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -1,58 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Returns values from data on which given indices are pointing at.
struct lookup_table : public primitive_base<lookup_table> {
CLDNN_DECLARE_PRIMITIVE(lookup_table)
/// @brief Enum type to specify axis to maximize/minimize along.
enum axis_name { batch, feature, x, y, xyf };
/// @brief Constructs lookup_table primitive.
/// @param id This primitive id.
/// @param input_data Input data primitive id.
/// @param input_indices Input indices primitive id.
/// @param axis Axis to return values from.
lookup_table(const primitive_id& id,
const primitive_id& input_data,
const primitive_id& input_indices,
axis_name axis = axis_name::xyf,
const padding& output_padding = padding())
: primitive_base(id, {input_data, input_indices}, output_padding),
axis(axis),
with_axis(axis == axis_name::xyf ? false : true) {}
/// @brief Axis to return values from. If not set, returns data which index is pointing at in the flattened x, y, f dimensions for each batch.
axis_name axis;
/// @brief Indicates that the primitive has user defined axis to return values from.
bool with_axis;
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -113,12 +113,6 @@ struct network {
/// @brief Provides user-supplied @ref memory for output primitives defined by user in source @ref topology. /// @brief Provides user-supplied @ref memory for output primitives defined by user in source @ref topology.
void set_output_memory(const primitive_id& id, const memory& mem) const; void set_output_memory(const primitive_id& id, const memory& mem) const;
/// @brief Sets learning rate for training primitives.
void set_learning_rate(const float lr);
/// @brief Return learning rate.
float get_learning_rate();
/// @brief Return stream id. /// @brief Return stream id.
uint16_t get_stream_id(); uint16_t get_stream_id();

View File

@ -1,51 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
#include <vector>
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Performs scale primitive backward for input.
struct scale_grad_input : public primitive_base<scale_grad_input> {
CLDNN_DECLARE_PRIMITIVE(scale_grad_input)
/// @brief Constructs scale_grad_input.
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param scale_input Scale input primitive id with values needed for product computation.
scale_grad_input(const primitive_id& id,
const primitive_id& input,
const primitive_id& scale_input, // should be bfyx or yxfb, where each dimension can be 1, if all
// dimensions are 1 then this is scalar
const padding& output_padding = padding())
: primitive_base(id, {input, scale_input}, output_padding) {}
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {}; }
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -1,131 +0,0 @@
/*
// Copyright (c) 2016 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
#include <vector>
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Performs scale layer backward for scale_input and biases.
struct scale_grad_weights : public primitive_base<scale_grad_weights> {
CLDNN_DECLARE_PRIMITIVE(scale_grad_weights)
/// @brief Constructs scale_grad_weights primitive without bias.
/// @param id This primitive id.
/// @param input Input primitive id. Same as input for scale forward.
/// @param input_grad Input gradient primitive id.
/// @param scale_input Scale input primitive id.
/// @param scale_grad Id of primitive which uses weights and biases updated in this primitive. This is for correct order of calculating.
scale_grad_weights(const primitive_id& id,
const primitive_id& input,
const primitive_id& input_grad,
const primitive_id& scale_input, // should be one number per feature
const primitive_id& scale_grad = "", // leave empty if this is last primitive in backward pass
const padding& output_padding = padding())
: primitive_base(id, {input, input_grad}, output_padding),
scale_input(scale_input),
bias(""),
prev_scale_grad(""),
prev_bias_grad(""),
scale_grad(scale_grad) {}
/// @brief Constructs scale_grad_weights primitive with optional adding bias.
/// @param id This primitive id.
/// @param input Input primitive id. Same as input for scale forward.
/// @param input_grad Input gradient primitive id.
/// @param scale_input Scale input primitive id.
/// @param bias Primitive id containing bias data.
/// @param scale_grad Id of primitive which uses weights and biases updated in this primitive. This is for correct order of calculating.
scale_grad_weights(const primitive_id& id,
const primitive_id& input,
const primitive_id& input_grad,
const primitive_id& scale_input, // should be one number per feature
const primitive_id& bias, // should be same size as scale_input
const primitive_id& scale_grad = "", // leave empty if this is last primitive in backward pass
const padding& output_padding = padding())
: primitive_base(id, {input, input_grad}, output_padding),
scale_input(scale_input),
bias(bias),
prev_scale_grad(""),
prev_bias_grad(""),
scale_grad(scale_grad) {}
/// @brief Constructs scale_grad_weights primitive with optional bias and momentum optimizer.
/// @param id This primitive id.
/// @param input Input primitive id. Same as input for scale forward.
/// @param input_grad Input gradient primitive id.
/// @param scale_input Scale input primitive id.
/// @param bias Primitive id containing bias data.
/// @param prev_scale_grad Id of primitive which contains scale gradient data calculated in previous iteration. Used in momentum optimizer.
/// @param prev_bias_grad Id of primitive which contains bias gradient data calculated in previous iteration. Used in momentum optimizer.
/// @param scale_grad Id of primitive which uses weights and biases updated in this primitive. This is for correct order of calculating.
scale_grad_weights(const primitive_id& id,
const primitive_id& input,
const primitive_id& input_grad,
const primitive_id& scale_input, // should be one number per feature
const primitive_id& bias, // should be same size as scale_input
const primitive_id& prev_scale_grad,
const primitive_id& prev_bias_grad, // leave empty if bias not specified
const primitive_id& scale_grad = "", // leave empty if this is last primitive in backward pass
const padding& output_padding = padding())
: primitive_base(id, {input, input_grad}, output_padding),
scale_input(scale_input),
bias(bias),
prev_scale_grad(prev_scale_grad),
prev_bias_grad(prev_bias_grad),
scale_grad(scale_grad) {}
/// @brief Scale input primitive id.
primitive_id scale_input;
/// @brief Primitive id containing bias data.
primitive_id bias;
/// @brief Primitive id containing scale gradient data calculated in previous iteration.
primitive_id prev_scale_grad;
/// @brief Primitive id containing bias gradient data calculated in previous iteration.
primitive_id prev_bias_grad;
/// @brief Primitive id which uses weights and biases updated in this primitive.
primitive_id scale_grad;
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
std::vector<std::reference_wrapper<const primitive_id>> ret;
ret.reserve(1 + !bias.empty() + !prev_scale_grad.empty() + !prev_bias_grad.empty());
ret.push_back(scale_input);
if (!bias.empty())
ret.push_back(bias);
if (!prev_scale_grad.empty())
ret.push_back(prev_scale_grad);
if (!prev_bias_grad.empty())
ret.push_back(prev_bias_grad);
if (!scale_grad.empty())
ret.push_back(scale_grad);
return ret;
}
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -1,47 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Backward pass for Softmax log loss.
/// @details The output values are the same as input_prob, except for the correct one based on the label which is subtracted by 1.
struct softmax_loss_grad : public primitive_base<softmax_loss_grad> {
CLDNN_DECLARE_PRIMITIVE(softmax_loss_grad)
/// @brief Constructs softmax_loss_grad primitive.
/// @param id This primitive id.
/// @param input_prob Input primitive id.
/// @param labels Labels primitive id.
softmax_loss_grad(const primitive_id& id,
const primitive_id& input_prob,
const primitive_id& labels,
const padding& output_padding = padding())
: primitive_base(id, {input_prob, labels}, output_padding) {}
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -1,115 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "api/primitive.hpp"
#include <vector>
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Primitives that fuses convolution, batch norm, scale and optionally Relu.
struct fused_conv_bn_scale : public primitive_base<fused_conv_bn_scale> {
CLDNN_DECLARE_PRIMITIVE(fused_conv_bn_scale)
/// @brief Constructs convolution primitive fused with batch norm and scale.
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param weights List of primitive ids containing weights data.
/// @param bias List of primitive ids containing bias data.
/// @param epsilon Small number to protect from 0 dividing.
/// @param scale_input Scale input primitive id with values needed for product computation. Used in fused scale part.
/// @param scale_bias Primitive id containing bias data for fused scale part.
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
/// where (0,0) point of the convolution window should start calculations.
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
/// @param inv_variance Primitive id containing inverted variance calculated in this primitive. Used in fused batch norm part.
/// @param with_activation Enable Relu activation.
/// @param activation_slp Relu activation slope.
fused_conv_bn_scale(const primitive_id& id,
const primitive_id& input,
const std::vector<primitive_id>& weights,
const std::vector<primitive_id>& bias,
float epsilon,
const primitive_id& scale_input,
const primitive_id& scale_bias = "",
tensor stride = {1, 1, 1, 1},
tensor dilation = {1, 1, 1, 1},
tensor input_offset = {0, 0, 0, 0},
const primitive_id& inv_variance = "",
const padding& output_padding = padding())
: primitive_base(id, {input, scale_input}, output_padding),
input_offset(input_offset),
stride(stride),
dilation(dilation),
with_output_size(false),
scale_bias(scale_bias),
inv_variance(inv_variance),
epsilon(epsilon),
weights(weights),
bias(bias) {
if ((bias.size() != 0) && (weights.size() != bias.size()))
throw std::runtime_error("convolution's weights/bias count does not match");
}
/// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
tensor input_offset;
/// @brief Defines shift in input buffer between adjacent calculations of output values.
tensor stride;
/// @brief Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
/// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
/// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
tensor dilation;
/// @brief Indicates that the primitive has user-defined output size (non-zero value).
bool with_output_size;
/// @brief User-defined output data size of the primitive (w/o padding).
tensor output_size;
/// @brief Primitive id containing scale bias data for fused convolution.
primitive_id scale_bias;
/// @brief Primitive id containing inverted variance used in future gradient computing for fused convolution.
primitive_id inv_variance;
/// @brief Epsilon for fused convolution.
float epsilon;
/// @brief On how many cards split the computation to.
int32_t split() const { return static_cast<int32_t>(weights.size()); }
/// @brief List of primitive ids containing weights data.
const primitive_id_arr weights;
/// @brief List of primitive ids containing bias data.
const primitive_id_arr bias;
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
std::vector<std::reference_wrapper<const primitive_id>> ret;
ret.reserve(weights.size() + bias.size() + !scale_bias.empty() + !inv_variance.empty());
for (auto& w : weights) ret.push_back(std::ref(w));
for (auto& b : bias) ret.push_back(std::ref(b));
if (!scale_bias.empty())
ret.push_back(scale_bias);
if (!inv_variance.empty())
ret.push_back(inv_variance);
return ret;
}
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -37,9 +37,6 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
/// @param input Input primitive id. /// @param input Input primitive id.
/// @param weights List of primitive ids containing weights data. /// @param weights List of primitive ids containing weights data.
/// @param bias List of primitive ids containing bias data. /// @param bias List of primitive ids containing bias data.
/// @param w_quantization_factor List of primitive ids containing weights quanitization factors per output feature map.
/// @param output_calibration_factors List of primitive ids output containing calibration factors per output feature map.
/// @param i_quantization_factor Input quantization factor
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer, /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
/// where (0,0) point of the convolution window should start calculations. /// where (0,0) point of the convolution window should start calculations.
/// @param stride Defines shift in input buffer between adjacent calculations of output values. /// @param stride Defines shift in input buffer between adjacent calculations of output values.
@ -57,11 +54,6 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
eltwise_mode mode, eltwise_mode mode,
const std::vector<primitive_id>& weights, const std::vector<primitive_id>& weights,
const std::vector<primitive_id>& bias, const std::vector<primitive_id>& bias,
const std::vector<primitive_id>& conv_w_quantization_factor,
const std::vector<primitive_id>& conv_output_calibration_factors,
const float conv_i_quantization_factor,
const float non_conv_scale,
const primitive_id& eltw_output_calibration_factors,
const std::vector<tensor>& eltw_stride, const std::vector<tensor>& eltw_stride,
tensor stride = {1, 1, 1, 1}, tensor stride = {1, 1, 1, 1},
tensor input_offset = {0, 0, 0, 0}, tensor input_offset = {0, 0, 0, 0},
@ -74,18 +66,10 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
optional_data_type output_data_type = {}) optional_data_type output_data_type = {})
: primitive_base(id, {input, input2}, output_padding, output_data_type), : primitive_base(id, {input, input2}, output_padding, output_data_type),
conv((primitive_id_arr)weights, conv((primitive_id_arr)weights,
(primitive_id_arr)bias, (primitive_id_arr)bias),
(primitive_id_arr)conv_w_quantization_factor, eltw(),
(primitive_id_arr)conv_output_calibration_factors),
eltw(eltw_output_calibration_factors),
non_conv_scale(non_conv_scale),
conv_weights(weights), conv_weights(weights),
conv_bias(bias), conv_bias(bias) {
conv_weights_quantization_factors(conv_w_quantization_factor),
conv_output_calibration_factors(conv_output_calibration_factors) {
conv.input_quantization_factor = conv_i_quantization_factor;
conv.output_quantization_factor = 1.0f;
conv.input_offset = input_offset; conv.input_offset = input_offset;
conv.stride = stride; conv.stride = stride;
conv.dilation = dilation; conv.dilation = dilation;
@ -100,10 +84,6 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
if ((bias.size() != 0) && (weights.size() != bias.size())) if ((bias.size() != 0) && (weights.size() != bias.size()))
throw std::runtime_error("convolution's weights/bias count does not match"); throw std::runtime_error("convolution's weights/bias count does not match");
if (conv.output_calibration_factors.size()) {
if ((weights.size() != 0) && (weights.size() != conv.weights_quantization_factors.size()))
throw std::runtime_error("convolution's weights count does not match quantization factors count");
}
} }
struct conv_data { struct conv_data {
@ -111,14 +91,6 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
const primitive_id_arr weights; const primitive_id_arr weights;
/// @brief List of primitive ids containing bias data. /// @brief List of primitive ids containing bias data.
const primitive_id_arr bias; const primitive_id_arr bias;
/// @brief List of primitive ids containing weights quanitization factors per output feature map.
const primitive_id_arr weights_quantization_factors;
/// @brief List of primitive ids containing output quanitization factors per output feature map for convolution.
const primitive_id_arr output_calibration_factors;
/// @brief Input quantization factor for convolution
float input_quantization_factor;
/// @brief Output quantization factor for convolution
float output_quantization_factor;
/// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations. /// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
tensor input_offset; tensor input_offset;
/// @brief Defines shift in input buffer between adjacent calculations of output values. /// @brief Defines shift in input buffer between adjacent calculations of output values.
@ -137,20 +109,12 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
tensor output_size; tensor output_size;
conv_data(const primitive_id_arr& weights, conv_data(const primitive_id_arr& weights,
const primitive_id_arr& bias, const primitive_id_arr& bias)
const primitive_id_arr& weights_quantization_factors,
const primitive_id_arr& output_calibration_factors)
: weights(weights), : weights(weights),
bias(bias), bias(bias) {}
weights_quantization_factors(weights_quantization_factors),
output_calibration_factors(output_calibration_factors) {}
} conv; } conv;
struct eltw_data { struct eltw_data {
/// @brief Primitive id containing output quanitization factors per output feature map.
primitive_id output_calibration_factors;
/// @brief Output quantization factor for eltwise
float output_quantization_factor;
/// @param mode Eltwise mode. /// @param mode Eltwise mode.
eltwise_mode mode; eltwise_mode mode;
/// @brief Enable Relu activation. /// @brief Enable Relu activation.
@ -159,22 +123,11 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
float activation_negative_slope; float activation_negative_slope;
/// @brief Defines shift in input buffers between adjacent calculations of output values. /// @brief Defines shift in input buffers between adjacent calculations of output values.
std::vector<tensor> stride; std::vector<tensor> stride;
explicit eltw_data(const primitive_id& output_calibration_factors)
: output_calibration_factors(output_calibration_factors) {}
} eltw; } eltw;
/// @brief On how many cards split the computation to. /// @brief On how many cards split the computation to.
int32_t split() const { return static_cast<int32_t>(conv.weights.size()); } int32_t split() const { return static_cast<int32_t>(conv.weights.size()); }
// FIXME: In fact, that should be needed for any EltWise primitive, not
// only the fused one. What's more important, these scales should be
// separate for different inputs and probably per-channel, not per
// primitive.
//
// I'm only needing a scalar for my particular task, so let's hack like
// this in the meantime. The final design is still to be investigated.
float non_conv_scale = 1.0f;
/// @brief Is optimization that output contains data from second input ON ? /// @brief Is optimization that output contains data from second input ON ?
bool second_input_in_output = false; bool second_input_in_output = false;
bool depth_to_space_already_fused = false; bool depth_to_space_already_fused = false;
@ -182,21 +135,13 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
protected: protected:
const primitive_id_arr conv_weights; const primitive_id_arr conv_weights;
const primitive_id_arr conv_bias; const primitive_id_arr conv_bias;
const primitive_id_arr conv_weights_quantization_factors;
const primitive_id_arr conv_output_calibration_factors;
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
std::vector<std::reference_wrapper<const primitive_id>> ret; std::vector<std::reference_wrapper<const primitive_id>> ret;
ret.reserve(conv.weights.size() + conv.bias.size() + conv.weights_quantization_factors.size() + ret.reserve(conv.weights.size() + conv.bias.size());
conv.output_calibration_factors.size() + (eltw.output_calibration_factors.empty() ? 0 : 1));
for (auto& w : conv.weights) ret.push_back(std::ref(w)); for (auto& w : conv.weights) ret.push_back(std::ref(w));
for (auto& b : conv.bias) ret.push_back(std::ref(b)); for (auto& b : conv.bias) ret.push_back(std::ref(b));
for (auto& q : conv.weights_quantization_factors) ret.push_back(std::ref(q));
for (auto& q : conv.output_calibration_factors) ret.push_back(std::ref(q));
if (!eltw.output_calibration_factors.empty())
ret.push_back(eltw.output_calibration_factors);
return ret; return ret;
} }

View File

@ -67,7 +67,6 @@ inline uint8_t GetActivationAdditionalParamsNumber(ActivationFunction func) {
break; break;
case ActivationFunction::RELU_NEGATIVE_SLOPE: case ActivationFunction::RELU_NEGATIVE_SLOPE:
case ActivationFunction::ELU: case ActivationFunction::ELU:
case ActivationFunction::RELU_NEGATIVE_SLOPE_GRAD:
paramsNum = 1; paramsNum = 1;
break; break;
default: default:

View File

@ -25,8 +25,6 @@ enum class KernelType {
UNKNOWN, UNKNOWN,
ARG_MAX_MIN, ARG_MAX_MIN,
AVERAGE_UNPOOLING, AVERAGE_UNPOOLING,
BATCH_NORM_GRAD,
LOOKUP_TABLE,
CONVOLUTION, CONVOLUTION,
DECONVOLUTION, DECONVOLUTION,
LRN, LRN,
@ -38,9 +36,7 @@ enum class KernelType {
SOFT_MAX, SOFT_MAX,
ELTWISE, ELTWISE,
SCALE, SCALE,
FUSED_CONV_BN_SCALE,
FUSED_CONV_ELTWISE, FUSED_CONV_ELTWISE,
TABLE_LOOKUP,
REORDER, REORDER,
RESHAPE, RESHAPE,
PERMUTE, PERMUTE,
@ -49,21 +45,14 @@ enum class KernelType {
REGION_YOLO, REGION_YOLO,
REORG_YOLO, REORG_YOLO,
MAX_UNPOOLING, MAX_UNPOOLING,
CONVOLUTION_GRAD_WEIGHTS,
SCALE_GRAD_WEIGHTS,
MVN, MVN,
FULLY_CONNECTED_GRAD_INPUT,
FULLY_CONNECTED_GRAD_WEIGHTS,
LSTM_GEMM, LSTM_GEMM,
LSTM_ELT, LSTM_ELT,
EMBED,
SOFT_MAX_LOSS_GRAD,
BORDER, BORDER,
TILE, TILE,
SELECT, SELECT,
BROADCAST, BROADCAST,
GEMM, GEMM,
INDEX_SELECT,
PYRAMID_ROI_ALIGN, PYRAMID_ROI_ALIGN,
CONTRACT, CONTRACT,
ONE_HOT, ONE_HOT,
@ -133,8 +122,6 @@ enum class ActivationFunction {
SQRT, SQRT,
LINEAR, LINEAR,
ELU, ELU,
RELU_GRAD,
RELU_NEGATIVE_SLOPE_GRAD,
SIN, SIN,
ASIN, ASIN,
SINH, SINH,
@ -155,7 +142,6 @@ enum class ActivationFunction {
NEGATIVE, NEGATIVE,
NOT, NOT,
POW, POW,
NONE_GRAD,
ERF, ERF,
HARD_SIGMOID, HARD_SIGMOID,
RECIPROCAL, RECIPROCAL,

View File

@ -103,9 +103,6 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con
FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point,
DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params)); DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params));
if (newParams.gradient)
kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
if (!newParams.inputActivationParams.empty()) { if (!newParams.inputActivationParams.empty()) {
kernel.arguments.push_back({ArgumentDescriptor::Types::SLOPE, 0}); kernel.arguments.push_back({ArgumentDescriptor::Types::SLOPE, 0});
} }

View File

@ -34,7 +34,6 @@ ParamsKey ActivationKernelOpt::GetSupportedKey() const {
k.EnableAllOutputLayout(); k.EnableAllOutputLayout();
k.EnableTensorOffset(); k.EnableTensorOffset();
k.EnableBatching(); k.EnableBatching();
k.EnableGradient();
return k; return k;
} }

View File

@ -38,7 +38,6 @@ ParamsKey ActivationKernelRef::GetSupportedKey() const {
k.EnableTensorOffset(); k.EnableTensorOffset();
k.EnableTensorPitches(); k.EnableTensorPitches();
k.EnableBatching(); k.EnableBatching();
k.EnableGradient();
return k; return k;
} }

View File

@ -1,88 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "batch_norm_kernel_base.h"
#include <algorithm>
namespace kernel_selector {
bool BatchNormKernelBase::Validate(const Params& p, const optional_params& o) const {
if (p.GetType() != KernelType::BATCH_NORM_GRAD || o.GetType() != KernelType::BATCH_NORM_GRAD) {
return false;
}
return true;
}
JitConstants BatchNormKernelBase::GetJitConstants(const batch_norm_params& params) const {
JitConstants jit = MakeBaseParamsJitConstants(params);
jit.AddConstant(MakeJitConstant("EPSILON", params.batchNormParams.epsilon));
if (params.batchNormParams.with_inv_var)
jit.AddConstant(MakeJitConstant("FORWARD", 1));
if (params.batchNormParams.with_scale_shift)
jit.AddConstant(MakeJitConstant("SCALE_SHIFT", 1));
if (params.batchNormParams.with_mean_var_out)
jit.AddConstant(MakeJitConstant("MEAN_VAR_OUT", 1));
return jit;
}
BatchNormKernelBase::DispatchData BatchNormKernelBase::SetDefault(const batch_norm_params& params) const {
DispatchData kd;
kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
kd.gws0 = params.inputs[0].Batch().v;
kd.gws1 = params.inputs[0].Feature().v;
kd.gws2 = 1;
kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(256));
while (kd.gws0 % kd.lws0 != 0) {
--kd.lws0;
}
kd.lws1 = 1;
kd.lws2 = 1;
return kd;
}
KernelsData BatchNormKernelBase::GetCommonKernelsData(const Params& params,
const optional_params& options,
float estimatedTime) const {
if (!Validate(params, options)) {
return {};
}
const batch_norm_params& orgParams = static_cast<const batch_norm_params&>(params);
DispatchData runInfo = SetDefault(orgParams);
KernelData kd = KernelData::Default<batch_norm_params>(params);
auto cldnn_jit = GetJitConstants(orgParams);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
int inputs_num = 1 + orgParams.batchNormParams.with_inv_var + 2 * orgParams.batchNormParams.with_scale_shift +
2 * orgParams.batchNormParams.with_mean_var_out;
FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, inputs_num);
kd.estimatedTime = estimatedTime;
return {kd};
}
} // namespace kernel_selector

View File

@ -1,66 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "common_kernel_base.h"
#include "kernel_selector_params.h"
namespace kernel_selector {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// batch_norm_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct batch_norm_params : public base_params {
batch_norm_params() : base_params(KernelType::BATCH_NORM_GRAD) {}
struct DedicatedParams {
float epsilon;
bool with_inv_var;
bool with_scale_shift;
bool with_mean_var_out = false;
};
DedicatedParams batchNormParams;
virtual ParamsKey GetParamsKey() const {
return base_params::GetParamsKey();
}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// batch_norm_optional_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct batch_norm_optional_params : optional_params {
batch_norm_optional_params() : optional_params(KernelType::BATCH_NORM_GRAD) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// BatchNormKernelBase
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class BatchNormKernelBase : public common_kernel_base {
public:
using common_kernel_base::common_kernel_base;
virtual ~BatchNormKernelBase() {}
using DispatchData = CommonDispatchData;
protected:
bool Validate(const Params& params, const optional_params& options) const override;
KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const;
virtual JitConstants GetJitConstants(const batch_norm_params& params) const;
virtual DispatchData SetDefault(const batch_norm_params& params) const;
};
} // namespace kernel_selector

View File

@ -1,41 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "batch_norm_kernel_ref.h"
namespace kernel_selector {
ParamsKey BatchNormKernelRef::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableInputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::INT8);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableInputLayout(DataLayout::byxf);
k.EnableInputLayout(DataLayout::yxfb);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::byxf);
k.EnableOutputLayout(DataLayout::yxfb);
k.EnableBatching();
return k;
}
KernelsData BatchNormKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
}
} // namespace kernel_selector

View File

@ -1,30 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "batch_norm_kernel_base.h"
namespace kernel_selector {
class BatchNormKernelRef : public BatchNormKernelBase {
public:
BatchNormKernelRef() : BatchNormKernelBase("batch_norm_gpu_ref") {}
virtual ~BatchNormKernelRef() {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
};
} // namespace kernel_selector

View File

@ -1,29 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "batch_norm_kernel_selector.h"
#include "batch_norm_kernel_ref.h"
namespace kernel_selector {
batch_norm_kernel_selector::batch_norm_kernel_selector() {
Attach<BatchNormKernelRef>();
}
KernelsData batch_norm_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
return GetNaiveBestKernel(params, options, KernelType::BATCH_NORM_GRAD);
}
} // namespace kernel_selector

View File

@ -1,35 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "kernel_selector.h"
namespace kernel_selector {
class batch_norm_kernel_selector : public kernel_selector_base {
public:
static batch_norm_kernel_selector& Instance() {
static batch_norm_kernel_selector instance_;
return instance_;
}
batch_norm_kernel_selector();
virtual ~batch_norm_kernel_selector() {}
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
};
} // namespace kernel_selector

View File

@ -1,72 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "batch_norm_grad_kernel_base.h"
namespace kernel_selector {
bool BatchNormGradKernelBase::Validate(const Params& p, const optional_params& o) const {
if (p.GetType() != KernelType::BATCH_NORM_GRAD ||
o.GetType() != KernelType::BATCH_NORM_GRAD) {
return false;
}
return true;
}
JitConstants BatchNormGradKernelBase::GetJitConstants(const batch_norm_grad_params& params) const {
JitConstants jit = MakeBaseParamsJitConstants(params);
return jit;
}
BatchNormGradKernelBase::DispatchData BatchNormGradKernelBase::SetDefault(const batch_norm_grad_params& params) const {
DispatchData kd;
kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
kd.gws0 = params.inputs[0].Batch().v;
kd.gws1 = params.inputs[0].Feature().v;
kd.gws2 = 1;
kd.lws0 = params.inputs[0].Batch().v;
kd.lws1 = 1;
kd.lws2 = 1;
return kd;
}
KernelsData BatchNormGradKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimatedTime) const {
if (!Validate(params, options)) {
return {};
}
const batch_norm_grad_params& orgParams = static_cast<const batch_norm_grad_params&>(params);
DispatchData runInfo = SetDefault(orgParams);
KernelData kd = KernelData::Default<batch_norm_grad_params>(params);
auto cldnn_jit = GetJitConstants(orgParams);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 3);
kd.estimatedTime = estimatedTime;
return {kd};
}
} // namespace kernel_selector

View File

@ -1,57 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "common_kernel_base.h"
#include "kernel_selector_params.h"
namespace kernel_selector {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// batch_norm_grad_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct batch_norm_grad_params : public base_params {
batch_norm_grad_params() : base_params(KernelType::BATCH_NORM_GRAD) {}
virtual ParamsKey GetParamsKey() const {
return base_params::GetParamsKey();
}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// batch_norm_grad_optional_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct batch_norm_grad_optional_params : optional_params {
batch_norm_grad_optional_params() : optional_params(KernelType::BATCH_NORM_GRAD) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// BatchNormGradKernelBase
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class BatchNormGradKernelBase : public common_kernel_base {
public:
using common_kernel_base::common_kernel_base;
virtual ~BatchNormGradKernelBase() {}
using DispatchData = CommonDispatchData;
protected:
bool Validate(const Params& params, const optional_params& options) const override;
KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const;
virtual JitConstants GetJitConstants(const batch_norm_grad_params& params) const;
virtual DispatchData SetDefault(const batch_norm_grad_params& params) const;
};
} // namespace kernel_selector

View File

@ -1,41 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "batch_norm_grad_kernel_ref.h"
namespace kernel_selector {
ParamsKey BatchNormGradKernelRef::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableInputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::INT8);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableInputLayout(DataLayout::byxf);
k.EnableInputLayout(DataLayout::yxfb);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::byxf);
k.EnableOutputLayout(DataLayout::yxfb);
k.EnableBatching();
return k;
}
KernelsData BatchNormGradKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
}
} // namespace kernel_selector

View File

@ -1,30 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "batch_norm_grad_kernel_base.h"
namespace kernel_selector {
class BatchNormGradKernelRef : public BatchNormGradKernelBase {
public:
BatchNormGradKernelRef() : BatchNormGradKernelBase("batch_norm_grad_gpu_ref") {}
virtual ~BatchNormGradKernelRef() {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
};
} // namespace kernel_selector

View File

@ -1,29 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "batch_norm_grad_kernel_selector.h"
#include "batch_norm_grad_kernel_ref.h"
namespace kernel_selector {
batch_norm_grad_kernel_selector::batch_norm_grad_kernel_selector() {
Attach<BatchNormGradKernelRef>();
}
KernelsData batch_norm_grad_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
return GetNaiveBestKernel(params, options, KernelType::BATCH_NORM_GRAD);
}
} // namespace kernel_selector

View File

@ -1,35 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "kernel_selector.h"
namespace kernel_selector {
class batch_norm_grad_kernel_selector : public kernel_selector_base {
public:
static batch_norm_grad_kernel_selector& Instance() {
static batch_norm_grad_kernel_selector instance_;
return instance_;
}
batch_norm_grad_kernel_selector();
virtual ~batch_norm_grad_kernel_selector() {}
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
};
} // namespace kernel_selector

View File

@ -1,111 +0,0 @@
// Copyright (c) 2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "contract_kernel_base.h"
#include <vector>
#include "kernel_selector_utils.h"
namespace kernel_selector {
JitConstants ContractKernelBase::GetJitConstants(const contract_params& params) const {
JitConstants jit = MakeBaseParamsJitConstants(params);
const size_t no_dim_flag = 6;
std::vector<size_t> output_dims(4, no_dim_flag);
int out_dim = 2;
for (int i = 3; i >= 0; --i) {
if (std::find(params.reduction_axes.begin(), params.reduction_axes.end(), i) == params.reduction_axes.end())
output_dims.at(i) = out_dim--;
}
if (output_dims[3] != no_dim_flag)
jit.AddConstants({MakeJitConstant("DIM_X", output_dims.at(3))});
if (output_dims[2] != no_dim_flag)
jit.AddConstants({MakeJitConstant("DIM_Y", output_dims.at(2))});
if (output_dims[1] != no_dim_flag)
jit.AddConstants({MakeJitConstant("DIM_F", output_dims.at(1))});
if (output_dims[0] != no_dim_flag)
jit.AddConstants({MakeJitConstant("DIM_B", output_dims.at(0))});
jit.AddConstants({MakeJitConstant("REDUCE_X", output_dims.at(3) == no_dim_flag),
MakeJitConstant("REDUCE_Y", output_dims.at(2) == no_dim_flag),
MakeJitConstant("REDUCE_F", output_dims.at(1) == no_dim_flag),
MakeJitConstant("REDUCE_B", output_dims.at(0) == no_dim_flag)});
switch (params.mode) {
case ContractMode::SUM:
jit.AddConstants({MakeJitConstant("REDUCE_SEED", "0"), MakeJitConstant("REDUCE_OPERATION(a, b)", "a + b")});
break;
case ContractMode::PRODUCT:
jit.AddConstants({MakeJitConstant("REDUCE_SEED", "1"), MakeJitConstant("REDUCE_OPERATION(a, b)", "a * b")});
break;
case ContractMode::ALL:
jit.AddConstants(
{MakeJitConstant("REDUCE_SEED", "1"), MakeJitConstant("REDUCE_OPERATION(a, b)", "a && b")});
break;
case ContractMode::ANY:
jit.AddConstants(
{MakeJitConstant("REDUCE_SEED", "0"), MakeJitConstant("REDUCE_OPERATION(a, b)", "a || b")});
break;
case ContractMode::MAX:
jit.AddConstants({MakeJitConstant("REDUCE_SEED", "UNIT_VAL_MIN"),
MakeJitConstant("REDUCE_OPERATION(a, b)", "UNIT_MAX_FUNC(a,b)")});
break;
}
return jit;
}
ContractKernelBase::DispatchData ContractKernelBase::SetDefault(const contract_params& params) {
const auto& output = params.output;
DispatchData kd;
kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
std::vector<size_t> global{output.Feature().v, output.Y().v, output.X().v};
const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
kd.gws0 = global[0];
kd.gws1 = global[1];
kd.gws2 = global[2];
kd.lws0 = local[0];
kd.lws1 = local[1];
kd.lws2 = local[2];
return kd;
}
KernelsData ContractKernelBase::GetCommonKernelsData(const Params& params,
const optional_params& options,
float estimated_time) const {
assert(params.GetType() == KernelType::CONTRACT);
const auto& prim_params =
static_cast<const contract_params&>(params);
auto run_info = SetDefault(prim_params);
KernelData k_data = KernelData::Default<contract_params>(params);
auto cldnn_jit = GetJitConstants(prim_params);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = k_data.kernels[0];
FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point);
k_data.estimatedTime = estimated_time;
return {k_data};
}
} // namespace kernel_selector

View File

@ -1,52 +0,0 @@
// Copyright (c) 2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "common_kernel_base.h"
#include "kernel_selector_params.h"
#include <vector>
namespace kernel_selector {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// contract_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct contract_params : public base_params {
contract_params() : base_params(KernelType::CONTRACT), mode(ContractMode::ANY) {}
ContractMode mode;
std::vector<uint16_t> reduction_axes;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// contract_optional_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct contract_optional_params : optional_params {
contract_optional_params() : optional_params(KernelType::CONTRACT) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// ContractKernelBase
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class ContractKernelBase : public common_kernel_base {
public:
using common_kernel_base::common_kernel_base;
using DispatchData = CommonDispatchData;
protected:
JitConstants GetJitConstants(const contract_params& params) const;
static DispatchData SetDefault(const contract_params& params);
KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
};
} // namespace kernel_selector

View File

@ -1,49 +0,0 @@
// Copyright (c) 2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "contract_kernel_ref.h"
namespace kernel_selector {
ParamsKey ContractKernelRef::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableInputDataType(Datatype::INT32);
k.EnableInputDataType(Datatype::INT64);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::INT32);
k.EnableOutputDataType(Datatype::INT64);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBatching();
return k;
}
KernelsData ContractKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
}
} // namespace kernel_selector

View File

@ -1,27 +0,0 @@
// Copyright (c) 2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "contract_kernel_base.h"
namespace kernel_selector {
class ContractKernelRef : public ContractKernelBase {
public:
ContractKernelRef() : ContractKernelBase("contract_ref") {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
};
} // namespace kernel_selector

View File

@ -1,24 +0,0 @@
// Copyright (c) 2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "contract_kernel_selector.h"
#include "contract_kernel_ref.h"
namespace kernel_selector {
contract_kernel_selector::contract_kernel_selector() { Attach<ContractKernelRef>(); }
KernelsData contract_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
return GetNaiveBestKernel(params, options, KernelType::CONTRACT);
}
} // namespace kernel_selector

View File

@ -1,31 +0,0 @@
// Copyright (c) 2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "kernel_selector.h"
namespace kernel_selector {
class contract_kernel_selector : public kernel_selector_base {
public:
static contract_kernel_selector& Instance() {
static contract_kernel_selector instance;
return instance;
}
contract_kernel_selector();
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
};
} // namespace kernel_selector

View File

@ -1,87 +0,0 @@
/*
// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.h"
#include "kernel_selector_utils.h"
namespace kernel_selector {
ParamsKey ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputLayout(DataLayout::byx8_f4);
k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableDilation();
k.EnableBiasPerFeature();
k.EnableBatching();
k.EnableQuantization(QuantizationType::SYMMETRIC);
k.DisableTuning();
return k;
}
bool ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::Validate(const Params& p, const optional_params& o) const {
if (!Parent::Validate(p, o)) {
return false;
}
return true;
}
size_t static get_wg_batch_size(const convolution_params& params) {
if (params.inputs[0].Batch().v % 64 == 0)
return 32;
return 1;
}
ConvolutionKernelBase::DispatchData ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::SetDefault(
const convolution_params& arg,
int) const {
DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
runInfo.efficiency = FORCE_PRIORITY_1;
runInfo.gws0 = (arg.output.Batch().v * arg.output.Feature().v) / (4 * 2);
runInfo.gws1 = arg.output.X().v / 8;
runInfo.gws2 = arg.output.Y().v / 2;
runInfo.lws0 = 8 * get_wg_batch_size(arg);
runInfo.lws1 = 1;
runInfo.lws2 = 1;
return runInfo;
}
JitConstants ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::GetJitConstants(const convolution_params& params,
const DispatchData& kd) const {
auto jits = ConvolutionKernelBase::GetJitConstants(params, kd);
jits.AddConstant(MakeJitConstant("WG_BATCH_SIZE", get_wg_batch_size(params)));
return jits;
}
KernelsData ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params,
const optional_params& options) const {
KernelsData kd = GetCommonKernelsData(params, options, " -Dcl_intel_subgroups_char");
if (!kd.empty())
kd[0].estimatedTime = FORCE_PRIORITY_3;
return kd;
}
} // namespace kernel_selector

View File

@ -1,41 +0,0 @@
// Copyright (c) 2016 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "convolution_kernel_base.h"
#include <vector>
namespace kernel_selector {
class ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32 : public ConvolutionKernelBase {
public:
using Parent = ConvolutionKernelBase;
ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32()
: ConvolutionKernelBase("convolution_gpu_byx8_f4__fs_bs_yx_bsv4_fsv32") {}
virtual ~ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32() {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
protected:
bool Validate(const Params& p, const optional_params& o) const override;
JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
ConvolutionKernelBase::DispatchData SetDefault(const convolution_params& arg, int) const override;
WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
return WeightsLayout::os_is_y_x8_osv8_isv4_swizzled_by_4;
}
};
} // namespace kernel_selector

View File

@ -1,61 +0,0 @@
/*
// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.h"
#include "kernel_selector_utils.h"
namespace kernel_selector {
ParamsKey ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputLayout(DataLayout::byxf);
k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBiasPerFeature();
k.EnableNonBiasTerm();
k.EnableBatching();
k.EnableQuantization(QuantizationType::SYMMETRIC);
k.DisableTuning();
return k;
}
ConvolutionKernelBase::DispatchData ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32::SetDefault(
const convolution_params& arg,
int) const {
DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
runInfo.efficiency = FORCE_PRIORITY_1;
runInfo.gws0 = (arg.output.Batch().v * arg.output.Feature().v) / 4;
runInfo.gws1 = arg.output.X().v / 8;
runInfo.gws2 = arg.output.Y().v;
runInfo.lws0 = 8;
runInfo.lws1 = 1;
runInfo.lws2 = 1;
return runInfo;
}
KernelsData ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params,
const optional_params& options) const {
return GetCommonKernelsData(params, options);
}
} // namespace kernel_selector

View File

@ -1,37 +0,0 @@
// Copyright (c) 2016 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "convolution_kernel_base.h"
#include <vector>
namespace kernel_selector {
class ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32 : public ConvolutionKernelBase {
public:
ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32() : ConvolutionKernelBase("convolution_gpu_byxf_fs_bs_yx_bsv4_fsv32") {}
virtual ~ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32() {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
protected:
ConvolutionKernelBase::DispatchData SetDefault(const convolution_params& arg, int) const override;
WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
return WeightsLayout::yxio;
}
};
} // namespace kernel_selector

View File

@ -1,108 +0,0 @@
/*
// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "convolution_kernel_mmad_1x1_gemm.h"
namespace kernel_selector {
ParamsKey ConvolutionKernel_mmad_1x1_gemm::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputLayout(DataLayout::byxf_af32);
k.EnableOutputLayout(DataLayout::byxf_af32);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableDilation();
k.EnableBiasPerFeature();
k.EnableBiasPerOutput();
k.EnableNonBiasTerm();
k.EnableBatching();
k.EnableSplitSupport();
k.EnableDepthwiseSeparableOpt();
k.EnableQuantization(QuantizationType::SYMMETRIC);
k.DisableTuning();
return k;
}
bool ConvolutionKernel_mmad_1x1_gemm::Validate(const Params& p, const optional_params& o) const {
if (!ConvolutionKernelBase::Validate(p, o)) {
return false;
}
const auto& params = static_cast<const convolution_params&>(p);
if (params.filterSize.x != 1 || params.filterSize.y != 1)
return false;
if (params.stride.x != 1 || params.stride.y != 1)
return false;
if (params.padding.x != 0 || params.padding.y != 0)
return false;
const auto& input = params.inputs[0];
// we do not support padded input
if (input.X().pad.Total() != 0 || input.Y().pad.Total() != 0)
return false;
if (params.split != 1)
return false;
return true;
}
ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_1x1_gemm::SetDefault(const convolution_params& arg, int) const {
DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
// Sub-group size used by "convolution_1x1_gemm_MMAD" kernel.
constexpr size_t sub_group_size = 8;
const auto of_maps = arg.output.Feature().v;
const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
runInfo.efficiency = FORCE_PRIORITY_2;
runInfo.gws0 = RoundUp(arg.output.X().v * arg.output.Y().v, 8) / 8;
runInfo.gws1 = of_threads_per_batch * arg.output.Batch().v;
runInfo.gws2 = 1;
runInfo.lws0 = 1;
runInfo.lws1 = sub_group_size;
runInfo.lws2 = 1;
return runInfo;
}
JitConstants ConvolutionKernel_mmad_1x1_gemm::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const {
auto jit = Parent::GetJitConstants(params, runInfo);
jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws1));
// pitch for special block format used in this kernel
const size_t ifm_32_aligned = Align(params.weights.IFM().v, 32);
const size_t filter_ofm_block_pitch = (ifm_32_aligned / 32) * params.weights.X().v * params.weights.Y().v * 4 * 8 * 8;
jit.AddConstant(MakeJitConstant("FILTER_OFM_BLOCK_PITCH", filter_ofm_block_pitch));
return jit;
}
KernelsData ConvolutionKernel_mmad_1x1_gemm::GetKernelsData(const Params& params, const optional_params& options) const {
return GetTunedKernelsDataByIndex(params, options);
}
} // namespace kernel_selector

View File

@ -1,40 +0,0 @@
// Copyright (c) 2016 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "convolution_kernel_base.h"
#include <vector>
namespace kernel_selector {
class ConvolutionKernel_mmad_1x1_gemm : public ConvolutionKernelBase {
public:
using Parent = ConvolutionKernelBase;
ConvolutionKernel_mmad_1x1_gemm() : ConvolutionKernelBase("convolution_gpu_1x1_gemm_MMAD") {}
virtual ~ConvolutionKernel_mmad_1x1_gemm() {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
protected:
JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
bool Validate(const Params& p, const optional_params& o) const override;
WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
return WeightsLayout::os_is_yx_isa8_osv8_isv4;
}
};
} // namespace kernel_selector

View File

@ -1,180 +0,0 @@
/*
// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.h"
#include "kernel_selector_utils.h"
namespace kernel_selector {
static const size_t _SG_TILE_M = 32;
static const size_t _SG_TILE_N = 32;
static const size_t _SG_SIZE = 8; // sub group size
static const size_t _TILES_PER_SG_X = 1; // Persistent threads
static const size_t _TILES_PER_SG_Y = 1; // Persistent threads
ParamsKey ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBiasPerFeature();
k.EnableBatching();
k.EnableQuantization(QuantizationType::SYMMETRIC);
k.DisableTuning();
return k;
}
bool ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::Validate(const Params& p, const optional_params& o) const {
if (!ConvolutionKernelBase::Validate(p, o) || !CovolutionCheckInput(p, o)) {
return false;
}
const convolution_params& cp = static_cast<const convolution_params&>(p);
// make sure it's 1x1 conv
if (cp.filterSize.x != 1 || cp.filterSize.y != 1)
return false;
// make sure stride is 1x1
if (cp.stride.x != 1 || cp.stride.y != 1)
return false;
// input padding not supported
if (cp.inputs[0].X().pad.Total() != 0 || cp.inputs[0].Y().pad.Total() != 0 ||
cp.inputs[0].Feature().pad.Total() != 0 || cp.inputs[0].Batch().pad.Total() != 0)
return false;
// input and output spatial sizes must match
if (!(cp.output.X().v == cp.inputs[0].X().v) || !(cp.output.Y().v == cp.inputs[0].Y().v))
return false;
const auto m = cp.output.X().v * cp.output.Y().v * cp.output.Batch().v;
const auto k = cp.inputs[0].Feature().v;
const auto n = cp.output.Feature().v;
if (m % 32 != 0 && m % 128 != 0) // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M=128
return false;
if (k % 32 != 0) // Matrix size K, Must be mutliple of 32
return false;
if (n % 32 != 0 && n % 128 != 0) // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N=128
return false;
return true;
}
ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::SetDefault(
const convolution_params& arg,
int) const {
DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
runInfo.efficiency = FORCE_PRIORITY_1;
size_t mat_m = arg.output.X().v * arg.output.Y().v * arg.output.Batch().v;
size_t mat_n = arg.output.Feature().v;
size_t _MATRIX_M = mat_m;
size_t _MATRIX_N = mat_n;
size_t _WG_TILE_M = 128;
size_t _WG_TILE_N = 128;
// Calculate number of threads needed
const size_t threadsX = (_MATRIX_N / (_SG_TILE_N / _SG_SIZE)) / _TILES_PER_SG_X;
const size_t threadsY = (_MATRIX_M / _SG_TILE_M) / _TILES_PER_SG_Y;
// Define execution setup for kernel:
size_t globalWorkSize[3] = {threadsX, threadsY, 1};
size_t localWorkSize[3] = {_SG_SIZE * _WG_TILE_N / _SG_TILE_N, _WG_TILE_M / _SG_TILE_M, 1};
runInfo.gws0 = globalWorkSize[0];
runInfo.gws1 = globalWorkSize[1];
runInfo.gws2 = globalWorkSize[2];
runInfo.lws0 = localWorkSize[0];
runInfo.lws1 = localWorkSize[1];
runInfo.lws2 = localWorkSize[2];
return runInfo;
}
JitConstants ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::GetJitConstants(const convolution_params& params,
const DispatchData& runInfo) const {
auto jit = Parent::GetJitConstants(params, runInfo);
jit.AddConstant(MakeJitConstant("WG_TILE_M", 128)); // Work-Group tile size M, Must be mutliple of 32
jit.AddConstant(MakeJitConstant("WG_TILE_N", 128)); // Work-Group tile size N, Must be mutliple of 32
jit.AddConstant(MakeJitConstant("TILES_PER_SG_X", 1)); // Persistent threads
jit.AddConstant(MakeJitConstant("TILES_PER_SG_Y", 1)); // Persistent threads
// Do not change values below
jit.AddConstant(MakeJitConstant("DIM_X", 0));
jit.AddConstant(MakeJitConstant("DIM_Y", 1));
jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K", 32));
jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K_BFLOAT", 16));
jit.AddConstant(MakeJitConstant("SG_TILE_M", _SG_TILE_M));
jit.AddConstant(MakeJitConstant("SG_TILE_N", _SG_TILE_N));
jit.AddConstant(MakeJitConstant("SG_SIZE", _SG_SIZE));
jit.AddConstant(MakeJitConstant("SIMD_LANE_M", "SG_TILE_M"));
jit.AddConstant(MakeJitConstant("SIMD_LANE_N", "(SG_TILE_N / SG_SIZE)"));
jit.AddConstant(MakeJitConstant("WG_SIZE", "(SG_SIZE * WG_TILE_N / SG_TILE_N) * (WG_TILE_M / SG_TILE_M)"));
jit.AddConstant(MakeJitConstant("COMPILE_KERNELS", ""));
jit.AddConstant(MakeJitConstant("TILED_GLOBAL_LAYOUT", ""));
jit.AddConstant(MakeJitConstant("OUTPUT_TILED_GLOBAL_LAYOUT", ""));
const auto& input = params.inputs[0];
const auto& output = params.output;
auto m = output.X().v * output.Y().v * output.Batch().v;
auto k = input.Feature().v;
auto n = output.Feature().v;
jit.AddConstant(MakeJitConstant("MATRIX_M", m));
jit.AddConstant(MakeJitConstant("MATRIX_K", k));
jit.AddConstant(MakeJitConstant("MATRIX_N", n));
const size_t out_x_pitch = 32 * 4;
const size_t out_y_pitch = 32 * 4 * params.output.X().LogicalDimPadded();
const size_t out_b_block_pitch = out_y_pitch * params.output.Y().LogicalDimPadded();
const size_t out_f_block_pitch = out_b_block_pitch * ((params.output.Batch().v + 3) / 4);
const size_t out_offset = out_x_pitch * params.output.X().pad.before + out_y_pitch * params.output.Y().pad.before;
jit.AddConstant(MakeJitConstant("OUT_X_PITCH", out_x_pitch));
jit.AddConstant(MakeJitConstant("OUT_Y_PITCH", out_y_pitch));
jit.AddConstant(MakeJitConstant("OUT_B_BLOCK_PITCH", out_b_block_pitch));
jit.AddConstant(MakeJitConstant("OUT_F_BLOCK_PITCH", out_f_block_pitch));
jit.AddConstant(MakeJitConstant("OUT_OFFSET", out_offset));
bool out_padding = output.X().pad.Total() != 0 || output.Y().pad.Total() != 0;
jit.AddConstant(MakeJitConstant("OUT_WITH_PADDING", out_padding));
return jit;
}
KernelsData ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::GetKernelsData(const Params& params,
const optional_params& options) const {
KernelsData kd = GetCommonKernelsData(params, options);
if (!kd.empty())
kd[0].estimatedTime = FORCE_PRIORITY_1; // _3
return kd;
}
} // namespace kernel_selector

View File

@ -1,42 +0,0 @@
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "convolution_kernel_base.h"
#include <vector>
namespace kernel_selector {
class ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8 : public ConvolutionKernelBase {
public:
using Parent = ConvolutionKernelBase;
ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8()
: ConvolutionKernelBase("convolution_gpu_mmad_32x32sg_128x128wg_slm_int8") {}
virtual ~ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8() {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
protected:
bool Validate(const Params& p, const optional_params& o) const override;
JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
return WeightsLayout::is_o32_yx_isv32_swizzled_by_4;
}
};
} // namespace kernel_selector

View File

@ -1,180 +0,0 @@
/*
// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.h"
#include "kernel_selector_utils.h"
namespace kernel_selector {
static const size_t _SG_TILE_M = 32;
static const size_t _SG_TILE_N = 32;
static const size_t _SG_SIZE = 8; // sub group size
static const size_t _TILES_PER_SG_X = 1; // Persistent threads
static const size_t _TILES_PER_SG_Y = 1; // Persistent threads
ParamsKey ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBiasPerFeature();
k.EnableBatching();
k.EnableQuantization(QuantizationType::SYMMETRIC);
k.DisableTuning();
return k;
}
bool ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::Validate(const Params& p, const optional_params& o) const {
if (!ConvolutionKernelBase::Validate(p, o) || !CovolutionCheckInput(p, o)) {
return false;
}
const convolution_params& cp = static_cast<const convolution_params&>(p);
// make sure it's 1x1 conv
if (cp.filterSize.x != 1 || cp.filterSize.y != 1)
return false;
// make sure stride is 1x1
if (cp.stride.x != 1 || cp.stride.y != 1)
return false;
// input padding not supported
if (cp.inputs[0].X().pad.Total() != 0 || cp.inputs[0].Y().pad.Total() != 0 ||
cp.inputs[0].Feature().pad.Total() != 0 || cp.inputs[0].Batch().pad.Total() != 0)
return false;
// input and output spatial sizes must match
if (!(cp.output.X().v == cp.inputs[0].X().v) || !(cp.output.Y().v == cp.inputs[0].Y().v))
return false;
const auto m = cp.output.X().v * cp.output.Y().v * cp.output.Batch().v;
const auto k = cp.inputs[0].Feature().v;
const auto n = cp.output.Feature().v;
if (m % 32 != 0 && m % 224 != 0) // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M=128
return false;
if (k % 32 != 0) // Matrix size K, Must be mutliple of 32
return false;
if (n % 32 != 0 && n % 128 != 0) // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N=128
return false;
return true;
}
ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::SetDefault(
const convolution_params& arg,
int) const {
DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
runInfo.efficiency = FORCE_PRIORITY_1;
size_t mat_m = arg.output.X().v * arg.output.Y().v * arg.output.Batch().v;
size_t mat_n = arg.output.Feature().v;
size_t _MATRIX_M = mat_m;
size_t _MATRIX_N = mat_n;
size_t _WG_TILE_M = 224;
size_t _WG_TILE_N = 128;
// Calculate number of threads needed
const size_t threadsX = (_MATRIX_N / (_SG_TILE_N / _SG_SIZE)) / _TILES_PER_SG_X;
const size_t threadsY = (_MATRIX_M / _SG_TILE_M) / _TILES_PER_SG_Y;
// Define execution setup for kernel:
size_t globalWorkSize[3] = {threadsX, threadsY, 1};
size_t localWorkSize[3] = {_SG_SIZE * _WG_TILE_N / _SG_TILE_N, _WG_TILE_M / _SG_TILE_M, 1};
runInfo.gws0 = globalWorkSize[0];
runInfo.gws1 = globalWorkSize[1];
runInfo.gws2 = globalWorkSize[2];
runInfo.lws0 = localWorkSize[0];
runInfo.lws1 = localWorkSize[1];
runInfo.lws2 = localWorkSize[2];
return runInfo;
}
JitConstants ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::GetJitConstants(const convolution_params& params,
const DispatchData& runInfo) const {
auto jit = Parent::GetJitConstants(params, runInfo);
jit.AddConstant(MakeJitConstant("WG_TILE_M", 224)); // Work-Group tile size M, Must be mutliple of 32
jit.AddConstant(MakeJitConstant("WG_TILE_N", 128)); // Work-Group tile size N, Must be mutliple of 32
jit.AddConstant(MakeJitConstant("TILES_PER_SG_X", _TILES_PER_SG_X));
jit.AddConstant(MakeJitConstant("TILES_PER_SG_Y", _TILES_PER_SG_Y));
// Do not change values below
jit.AddConstant(MakeJitConstant("DIM_X", 0));
jit.AddConstant(MakeJitConstant("DIM_Y", 1));
jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K", 32));
jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K_BFLOAT", 16));
jit.AddConstant(MakeJitConstant("SG_TILE_M", _SG_TILE_M));
jit.AddConstant(MakeJitConstant("SG_TILE_N", _SG_TILE_N));
jit.AddConstant(MakeJitConstant("SG_SIZE", _SG_SIZE));
jit.AddConstant(MakeJitConstant("SIMD_LANE_M", "SG_TILE_M"));
jit.AddConstant(MakeJitConstant("SIMD_LANE_N", "(SG_TILE_N / SG_SIZE)"));
jit.AddConstant(MakeJitConstant("WG_SIZE", "(SG_SIZE * WG_TILE_N / SG_TILE_N) * (WG_TILE_M / SG_TILE_M)"));
jit.AddConstant(MakeJitConstant("COMPILE_KERNELS", ""));
jit.AddConstant(MakeJitConstant("TILED_GLOBAL_LAYOUT", ""));
jit.AddConstant(MakeJitConstant("OUTPUT_TILED_GLOBAL_LAYOUT", ""));
const auto& input = params.inputs[0];
const auto& output = params.output;
auto m = output.X().v * output.Y().v * output.Batch().v;
auto k = input.Feature().v;
auto n = output.Feature().v;
jit.AddConstant(MakeJitConstant("MATRIX_M", m)); // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M
jit.AddConstant(MakeJitConstant("MATRIX_K", k)); // Matrix size K, Must be mutliple of 32
jit.AddConstant(MakeJitConstant("MATRIX_N", n)); // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N
const size_t out_x_pitch = 32 * 4;
const size_t out_y_pitch = 32 * 4 * params.output.X().LogicalDimPadded();
const size_t out_b_block_pitch = out_y_pitch * params.output.Y().LogicalDimPadded();
const size_t out_f_block_pitch = out_b_block_pitch * ((params.output.Batch().v + 3) / 4);
const size_t out_offset = out_x_pitch * params.output.X().pad.before + out_y_pitch * params.output.Y().pad.before;
jit.AddConstant(MakeJitConstant("OUT_X_PITCH", out_x_pitch));
jit.AddConstant(MakeJitConstant("OUT_Y_PITCH", out_y_pitch));
jit.AddConstant(MakeJitConstant("OUT_B_BLOCK_PITCH", out_b_block_pitch));
jit.AddConstant(MakeJitConstant("OUT_F_BLOCK_PITCH", out_f_block_pitch));
jit.AddConstant(MakeJitConstant("OUT_OFFSET", out_offset));
bool out_padding = output.X().pad.Total() != 0 || output.Y().pad.Total() != 0;
jit.AddConstant(MakeJitConstant("OUT_WITH_PADDING", out_padding));
return jit;
}
KernelsData ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::GetKernelsData(const Params& params,
const optional_params& options) const {
KernelsData kd = GetCommonKernelsData(params, options);
if (!kd.empty())
kd[0].estimatedTime = FORCE_PRIORITY_1; // _3
return kd;
}
} // namespace kernel_selector

View File

@ -1,42 +0,0 @@
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "convolution_kernel_base.h"
#include <vector>
namespace kernel_selector {
class ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8 : public ConvolutionKernelBase {
public:
using Parent = ConvolutionKernelBase;
ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8()
: ConvolutionKernelBase("convolution_gpu_mmad_32x32sg_224x128wg_slm_int8") {}
virtual ~ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8() {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
protected:
bool Validate(const Params& p, const optional_params& o) const override;
JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
return WeightsLayout::is_o32_yx_isv32_swizzled_by_4;
}
};
} // namespace kernel_selector

View File

@ -1,176 +0,0 @@
/*
// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "convolution_kernel_mmad_32x32sg_slm_int8.h"
#include "kernel_selector_utils.h"
namespace kernel_selector {
static const size_t _SG_TILE_M = 32;
static const size_t _SG_TILE_N = 32;
static const size_t _SG_SIZE = 8; // sub group size
static const size_t _TILES_PER_SG_X = 1; // Persistent threads
static const size_t _TILES_PER_SG_Y = 1; // Persistent threads
ParamsKey ConvolutionKernel_mmad_32x32sg_slm_int8::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBiasPerFeature();
k.EnableBatching();
k.EnableQuantization(QuantizationType::SYMMETRIC);
k.DisableTuning();
return k;
}
bool ConvolutionKernel_mmad_32x32sg_slm_int8::Validate(const Params& p, const optional_params& o) const {
if (!ConvolutionKernelBase::Validate(p, o) || !CovolutionCheckInput(p, o)) {
return false;
}
const convolution_params& cp = static_cast<const convolution_params&>(p);
// make sure it's 1x1 conv
if (cp.filterSize.x != 1 || cp.filterSize.y != 1)
return false;
// make sure stride is 1x1
if (cp.stride.x != 1 || cp.stride.y != 1)
return false;
// input padding not supported
if (cp.inputs[0].X().pad.Total() != 0 || cp.inputs[0].Y().pad.Total() != 0 ||
cp.inputs[0].Feature().pad.Total() != 0 || cp.inputs[0].Batch().pad.Total() != 0)
return false;
// input and output spatial sizes must match
if (!(cp.output.X().v == cp.inputs[0].X().v) || !(cp.output.Y().v == cp.inputs[0].Y().v))
return false;
const auto m = cp.output.X().v * cp.output.Y().v * cp.output.Batch().v;
const auto k = cp.inputs[0].Feature().v;
const auto n = cp.output.Feature().v;
if (m % 32 != 0) // Matrix size M, Must be mutliple of 32
return false;
if (k % 32 != 0) // Matrix size K, Must be multiple of 32
return false;
if (n % 32 != 0) // Matrix size N, Must be mutliple of 32
return false;
return true;
}
ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_32x32sg_slm_int8::SetDefault(const convolution_params& arg,
int) const {
DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
runInfo.efficiency = FORCE_PRIORITY_2;
size_t mat_m = arg.output.X().v * arg.output.Y().v * arg.output.Batch().v;
size_t mat_n = arg.output.Feature().v;
size_t _MATRIX_M = mat_m;
size_t _MATRIX_N = mat_n;
size_t _WG_TILE_M = 32;
size_t _WG_TILE_N = 32;
// Calculate number of threads needed
const size_t threadsX = (_MATRIX_N / (_SG_TILE_N / _SG_SIZE)) / _TILES_PER_SG_X;
const size_t threadsY = (_MATRIX_M / _SG_TILE_M) / _TILES_PER_SG_Y;
// Define execution setup for kernel:
size_t globalWorkSize[3] = {threadsX, threadsY, 1};
size_t localWorkSize[3] = {_SG_SIZE * _WG_TILE_N / _SG_TILE_N, _WG_TILE_M / _SG_TILE_M, 1};
runInfo.gws0 = globalWorkSize[0];
runInfo.gws1 = globalWorkSize[1];
runInfo.gws2 = globalWorkSize[2];
runInfo.lws0 = localWorkSize[0];
runInfo.lws1 = localWorkSize[1];
runInfo.lws2 = localWorkSize[2];
return runInfo;
}
JitConstants ConvolutionKernel_mmad_32x32sg_slm_int8::GetJitConstants(const convolution_params& params,
const DispatchData& runInfo) const {
auto jit = Parent::GetJitConstants(params, runInfo);
jit.AddConstant(MakeJitConstant("WG_TILE_M", 32)); // Work-Group tile size M, Must be mutliple of 32
jit.AddConstant(MakeJitConstant("WG_TILE_N", 32)); // Work-Group tile size N, Must be mutliple of 32
jit.AddConstant(MakeJitConstant("TILES_PER_SG_X", _TILES_PER_SG_X));
jit.AddConstant(MakeJitConstant("TILES_PER_SG_Y", _TILES_PER_SG_Y));
// Do not change values below
jit.AddConstant(MakeJitConstant("DIM_X", 0));
jit.AddConstant(MakeJitConstant("DIM_Y", 1));
jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K", 32));
jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K_BFLOAT", 16));
jit.AddConstant(MakeJitConstant("SG_TILE_M", _SG_TILE_M));
jit.AddConstant(MakeJitConstant("SG_TILE_N", _SG_TILE_N));
jit.AddConstant(MakeJitConstant("SG_SIZE", _SG_SIZE));
jit.AddConstant(MakeJitConstant("SIMD_LANE_M", "SG_TILE_M"));
jit.AddConstant(MakeJitConstant("SIMD_LANE_N", "(SG_TILE_N / SG_SIZE)"));
jit.AddConstant(MakeJitConstant("WG_SIZE", "(SG_SIZE * WG_TILE_N / SG_TILE_N) * (WG_TILE_M / SG_TILE_M)"));
jit.AddConstant(MakeJitConstant("COMPILE_KERNELS", ""));
jit.AddConstant(MakeJitConstant("TILED_GLOBAL_LAYOUT", ""));
jit.AddConstant(MakeJitConstant("OUTPUT_TILED_GLOBAL_LAYOUT", ""));
const auto& input = params.inputs[0];
const auto& output = params.output;
auto m = output.X().v * output.Y().v * output.Batch().v;
auto k = input.Feature().v;
auto n = output.Feature().v;
jit.AddConstant(MakeJitConstant("MATRIX_M", m)); // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M
jit.AddConstant(MakeJitConstant("MATRIX_K", k)); // Matrix size K, Must be mutliple of 32
jit.AddConstant(MakeJitConstant("MATRIX_N", n)); // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N
const size_t out_x_pitch = 32 * 4;
const size_t out_y_pitch = 32 * 4 * params.output.X().LogicalDimPadded();
const size_t out_b_block_pitch = out_y_pitch * params.output.Y().LogicalDimPadded();
const size_t out_f_block_pitch = out_b_block_pitch * ((params.output.Batch().v + 3) / 4);
const size_t out_offset = out_x_pitch * params.output.X().pad.before + out_y_pitch * params.output.Y().pad.before;
jit.AddConstant(MakeJitConstant("OUT_X_PITCH", out_x_pitch));
jit.AddConstant(MakeJitConstant("OUT_Y_PITCH", out_y_pitch));
jit.AddConstant(MakeJitConstant("OUT_B_BLOCK_PITCH", out_b_block_pitch));
jit.AddConstant(MakeJitConstant("OUT_F_BLOCK_PITCH", out_f_block_pitch));
jit.AddConstant(MakeJitConstant("OUT_OFFSET", out_offset));
return jit;
}
KernelsData ConvolutionKernel_mmad_32x32sg_slm_int8::GetKernelsData(const Params& params,
const optional_params& options) const {
KernelsData kd = GetCommonKernelsData(params, options);
if (!kd.empty())
kd[0].estimatedTime = FORCE_PRIORITY_2; // _3
return kd;
}
} // namespace kernel_selector

View File

@ -1,41 +0,0 @@
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "convolution_kernel_base.h"
#include <vector>
namespace kernel_selector {
class ConvolutionKernel_mmad_32x32sg_slm_int8 : public ConvolutionKernelBase {
public:
using Parent = ConvolutionKernelBase;
ConvolutionKernel_mmad_32x32sg_slm_int8() : ConvolutionKernelBase("convolution_gpu_mmad_32x32sg_slm_int8") {}
virtual ~ConvolutionKernel_mmad_32x32sg_slm_int8() {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
protected:
bool Validate(const Params& p, const optional_params& o) const override;
JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
return WeightsLayout::is_o_yx_isv32;
}
};
} // namespace kernel_selector

View File

@ -35,19 +35,8 @@
#include "convolution_kernel_winograd_6x3_s1_fused.h" #include "convolution_kernel_winograd_6x3_s1_fused.h"
#include "convolution_kernel_mmad.h" #include "convolution_kernel_mmad.h"
#include "convolution_kernel_mmad_blocks.h" #include "convolution_kernel_mmad_blocks.h"
#include "convolution_kernel_mmad_1x1_gemm.h"
#include "convolution_kernel_imad_byxf_af32_depthwise.h" #include "convolution_kernel_imad_byxf_af32_depthwise.h"
#include "convolution_kernel_mmad_batched.h"
#include "convolution_kernel_bfyx_depthwise_weights_lwg.h" #include "convolution_kernel_bfyx_depthwise_weights_lwg.h"
#include "convolution_kernel_mmad_slm_2x14_rep4.h"
#include "convolution_kernel_mmad_slm_7x7_rep4.h"
#include "convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.h"
#include "convolution_kernel_mmad_batched_block.h"
#include "convolution_kernel_mmad_batched_block_1x1.h"
#include "convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.h"
#include "convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.h"
#include "convolution_kernel_mmad_32x32sg_slm_int8.h"
#include "convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.h"
#include "convolution_kernel_imad.h" #include "convolution_kernel_imad.h"
#include "convolution_kernel_fs_byx_fsv32.h" #include "convolution_kernel_fs_byx_fsv32.h"
#include "convolution_kernel_fs_byx_fsv32_1x1.h" #include "convolution_kernel_fs_byx_fsv32_1x1.h"
@ -134,19 +123,6 @@ convolution_kernel_selector::convolution_kernel_selector() {
Attach<ConvolutionKernel_mmad_blocks>(); Attach<ConvolutionKernel_mmad_blocks>();
Attach<ConvolutionKernel_imad_byxf_af32_1x1>(); Attach<ConvolutionKernel_imad_byxf_af32_1x1>();
Attach<ConvolutionKernel_imad_byxf_af32_depthiwise>(); Attach<ConvolutionKernel_imad_byxf_af32_depthiwise>();
Attach<ConvolutionKernel_mmad_1x1_gemm>();
// fs_bs_yx_bsv4_fsv32 int8
Attach<ConvolutionKernel_mmad_batched>();
Attach<ConvolutionKernel_mmad_slm_2x14_rep4>();
Attach<ConvolutionKernel_mmad_slm_7x7_rep4>();
Attach<ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8>();
Attach<ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8>();
Attach<ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32>();
Attach<ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32>();
Attach<ConvolutionKernel_mmad_batched_block>();
Attach<ConvolutionKernel_mmad_batched_block_1x1>();
// Attach<ConvolutionKernel_mmad_32x32sg_slm_int8>();
// b_fs_yx_fsv4 kernels // b_fs_yx_fsv4 kernels
Attach<ConvolutionKernel_imad>(); Attach<ConvolutionKernel_imad>();

View File

@ -1,67 +0,0 @@
/*
// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "convolution_grad_weights_kernel_1x1.h"
namespace kernel_selector {
ParamsKey ConvolutionGradWeightsKernel1x1::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F32);
k.EnableInputWeightsType(WeightsType::F32);
k.EnableOutputDataType(Datatype::F32);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::yxfb);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::byxf);
k.EnableSubGroup();
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBiasPerFeature();
k.EnableNonBiasTerm();
k.EnableMomentum();
k.EnableBatching();
k.EnableSplitSupport();
k.EnableGradient();
k.DisableTuning();
return k;
}
bool ConvolutionGradWeightsKernel1x1::Validate(const Params& p, const optional_params&) const {
const convolution_grad_weights_params& params = static_cast<const convolution_grad_weights_params&>(p);
if (params.filterSize.x != 1 || params.filterSize.y != 1)
return false;
return true;
}
ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel1x1::SetDefault(
const convolution_grad_weights_params& params) const {
auto input_features = params.weights.IFM().v;
auto output_features = params.weights.OFM().v;
DispatchData kd;
kd.gws0 = 16;
kd.gws1 = input_features;
kd.gws2 = output_features;
kd.lws0 = 16;
kd.lws1 = 1;
kd.lws2 = 1;
kd.efficiency = FORCE_PRIORITY_8;
return kd;
}
} // namespace kernel_selector

View File

@ -1,32 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "convolution_grad_weights_kernel_base.h"
namespace kernel_selector {
class ConvolutionGradWeightsKernel1x1 : public ConvolutionGradWeightsKernelBase {
public:
ConvolutionGradWeightsKernel1x1() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_1x1") {}
virtual ~ConvolutionGradWeightsKernel1x1() {}
DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
bool Validate(const Params& p, const optional_params& o) const override;
ParamsKey GetSupportedKey() const override;
};
} // namespace kernel_selector

View File

@ -1,72 +0,0 @@
/*
// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "convolution_grad_weights_kernel_3x3.h"
#include <algorithm>
namespace kernel_selector {
ParamsKey ConvolutionGradWeightsKernel3x3::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F32);
k.EnableInputWeightsType(WeightsType::F32);
k.EnableOutputDataType(Datatype::F32);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::yxfb);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::byxf);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBiasPerFeature();
k.EnableNonBiasTerm();
k.EnableMomentum();
k.EnableBatching();
k.EnableSplitSupport();
k.EnableGradient();
k.DisableTuning();
return k;
}
bool ConvolutionGradWeightsKernel3x3::Validate(const Params& p, const optional_params&) const {
const auto& params = static_cast<const convolution_grad_weights_params&>(p);
if (params.stride.x != 1 || params.stride.y != 1)
return false;
if (params.filterSize.x != 3 || params.filterSize.y != 3)
return false;
return true;
}
ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel3x3::SetDefault(
const convolution_grad_weights_params& params) const {
auto input_features = params.weights.IFM().v;
auto output_features = params.weights.OFM().v;
DispatchData kd;
kd.gws0 = Align(output_features, 16);
kd.gws1 = input_features;
kd.gws2 = 1;
kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
while (kd.gws0 % kd.lws0 != 0) {
kd.lws0 -= 16;
}
kd.lws1 = 1;
kd.lws2 = 1;
kd.efficiency = FORCE_PRIORITY_8;
return kd;
}
} // namespace kernel_selector

View File

@ -1,32 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "convolution_grad_weights_kernel_base.h"
namespace kernel_selector {
class ConvolutionGradWeightsKernel3x3 : public ConvolutionGradWeightsKernelBase {
public:
ConvolutionGradWeightsKernel3x3() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_3x3") {}
virtual ~ConvolutionGradWeightsKernel3x3() {}
DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
bool Validate(const Params& p, const optional_params& o) const override;
ParamsKey GetSupportedKey() const override;
};
} // namespace kernel_selector

View File

@ -1,70 +0,0 @@
/*
// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "convolution_grad_weights_kernel_7x7.h"
#include <algorithm>
namespace kernel_selector {
ParamsKey ConvolutionGradWeightsKernel7x7::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F32);
k.EnableInputWeightsType(WeightsType::F32);
k.EnableOutputDataType(Datatype::F32);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::yxfb);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::byxf);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBiasPerFeature();
k.EnableNonBiasTerm();
k.EnableMomentum();
k.EnableBatching();
k.EnableSplitSupport();
k.EnableGradient();
k.DisableTuning();
return k;
}
bool ConvolutionGradWeightsKernel7x7::Validate(const Params& p, const optional_params&) const {
const auto& params = static_cast<const convolution_grad_weights_params&>(p);
if (params.filterSize.x != 7 || params.filterSize.y != 7)
return false;
return true;
}
ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel7x7::SetDefault(
const convolution_grad_weights_params& params) const {
auto input_features = params.weights.IFM().v;
auto output_features = params.weights.OFM().v;
DispatchData kd;
kd.gws0 = 8;
kd.gws1 = Align(output_features, 16);
kd.gws2 = input_features;
kd.lws0 = 1;
kd.lws1 = std::min(std::max(kd.gws1, static_cast<size_t>(1)), static_cast<size_t>(32));
while (kd.gws1 % kd.lws1 != 0) {
kd.lws1 -= 16;
}
kd.lws2 = 1;
kd.efficiency = FORCE_PRIORITY_8;
return kd;
}
} // namespace kernel_selector

View File

@ -1,32 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "convolution_grad_weights_kernel_base.h"
namespace kernel_selector {
class ConvolutionGradWeightsKernel7x7 : public ConvolutionGradWeightsKernelBase {
public:
ConvolutionGradWeightsKernel7x7() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_7x7") {}
virtual ~ConvolutionGradWeightsKernel7x7() {}
DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
bool Validate(const Params& p, const optional_params& o) const override;
ParamsKey GetSupportedKey() const override;
};
} // namespace kernel_selector

View File

@ -1,135 +0,0 @@
// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "convolution_grad_weights_kernel_base.h"
#include "kernel_selector_utils.h"
#include <string>
#include <vector>
#include <algorithm>
namespace kernel_selector {
std::string convolution_grad_weights_params::to_string() const {
std::stringstream s;
s << base_params::to_string() << "_";
if (bias.empty()) {
s << "no_bias"
<< "_";
} else {
s << "bias_" << bias[0].PhysicalSize() << "_";
}
s << filterSize.x << "_" << filterSize.y << "_";
s << stride.x << "_" << stride.y << "_";
s << dilation.x << "_" << dilation.y << "_";
s << padding.x << "_" << padding.y << "_";
s << split;
return s.str();
}
JitConstants ConvolutionGradWeightsKernelBase::GetJitConstants(const convolution_grad_weights_params& cp) const {
JitConstants jit = training_kernel_base::GetJitConstants(cp);
const auto& padding = cp.padding;
const auto& input = cp.inputs[0];
int64_t input_offset_with_padding = (int64_t)input.GetFirstElementOffset() -
(cp.filterSize.x - 1 + padding.x) * input.X().pitch -
(cp.filterSize.y - 1 + padding.y) * input.Y().pitch;
input_offset_with_padding = std::max(input_offset_with_padding, (int64_t)0);
jit.AddConstants({
MakeJitConstant("STRIDE", cp.stride),
MakeJitConstant("PADDING", cp.padding),
MakeJitConstant("DILATION", cp.dilation),
MakeJitConstant("FILTER_ARRAY_NUM", cp.split),
MakeJitConstant("INPUT0_OFFSET_WITH_PADDING", input_offset_with_padding),
MakeJitConstant("DEPTHWISE_SEPARABLE_OPT", cp.depthwise_separable_opt),
MakeJitConstant("OUTPUT_GRAD_W", cp.output_grad_w),
});
return jit;
}
ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernelBase::SetDefault(
const convolution_grad_weights_params& params) const {
auto input_features = params.weights.IFM().v;
auto output_features = params.weights.OFM().v;
DispatchData kd;
kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
size_t gws0 = output_features * input_features;
size_t lws0 = std::min(gws0, static_cast<size_t>(32));
while (gws0 % lws0) {
lws0--;
}
kd.gws0 = gws0;
kd.gws1 = params.weights.X().v;
kd.gws2 = params.weights.Y().v;
kd.lws0 = lws0;
kd.lws1 = 1;
kd.lws2 = 1;
kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
return kd;
}
KernelsData ConvolutionGradWeightsKernelBase::GetKernelsData(const Params& params,
const optional_params& options) const {
assert(params.GetType() == KernelType::CONVOLUTION_GRAD_WEIGHTS);
if (!Validate(params, options)) {
return {};
}
const convolution_grad_weights_params& orgParams = static_cast<const convolution_grad_weights_params&>(params);
DispatchData runInfo = SetDefault(orgParams);
KernelData kd = KernelData::Default<convolution_grad_weights_params>(params);
convolution_grad_weights_params& newParams = *static_cast<convolution_grad_weights_params*>(kd.params.get());
bool succeed = UpdateWeightsParams(newParams, options, WeightsLayout::oiyx, kd.weightsReorderParams);
if (!succeed) {
return {};
}
auto cldnn_jit = GetJitConstants(orgParams);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
FillCLKernelData(kernel,
runInfo,
params.engineInfo,
kernelName,
jit,
entry_point,
DEFAULT,
true,
!orgParams.bias.empty());
if (newParams.use_momentum) {
kernel.arguments.push_back({ArgumentDescriptor::Types::PREV_WEIGHTS_GRADIENT, 0});
if (!newParams.bias.empty())
kernel.arguments.push_back({ArgumentDescriptor::Types::PREV_BIAS_GRADIENT, 0});
}
kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
kernel.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0});
kernel.arguments.push_back({ArgumentDescriptor::Types::LEARNING_RATE, 0});
kd.estimatedTime = runInfo.efficiency;
return {kd};
}
} // namespace kernel_selector

View File

@ -1,79 +0,0 @@
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "training_kernel_base.h"
#include "kernel_selector_params.h"
#include <string>
namespace kernel_selector {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// convolution_grad_weights_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct convolution_grad_weights_params : public training_params {
convolution_grad_weights_params() : training_params(KernelType::CONVOLUTION_GRAD_WEIGHTS) {}
uSize filterSize;
uSize stride;
uSize dilation;
uSize padding;
uint32_t split = 1;
bool depthwise_separable_opt = false;
bool output_grad_w = false;
std::string to_string() const override;
ParamsKey GetParamsKey() const override {
ParamsKey k = training_params::GetParamsKey();
if (split > 1) {
k.EnableSplitSupport();
}
if (dilation.x != 1 || dilation.y != 1) {
k.EnableDilation();
}
if (depthwise_separable_opt) {
k.EnableDepthwiseSeparableOpt();
}
return k;
}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// convolution_grad_weights_optional_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct convolution_grad_weights_optional_params : training_optional_params {
convolution_grad_weights_optional_params() : training_optional_params(KernelType::CONVOLUTION_GRAD_WEIGHTS) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// ConvolutionGradWeightsKernelBase
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class ConvolutionGradWeightsKernelBase : public training_kernel_base {
public:
using training_kernel_base::training_kernel_base;
virtual ~ConvolutionGradWeightsKernelBase() {}
using DispatchData = CommonDispatchData;
protected:
virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const;
virtual JitConstants GetJitConstants(const convolution_grad_weights_params& params) const;
virtual DispatchData SetDefault(const convolution_grad_weights_params& params) const;
};
} // namespace kernel_selector

View File

@ -1,45 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "convolution_grad_weights_kernel_ref.h"
namespace kernel_selector {
ParamsKey ConvolutionGradWeightsKernelRef::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableInputWeightsType(WeightsType::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableInputLayout(DataLayout::yxfb);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableInputLayout(DataLayout::byxf);
k.EnableOutputLayout(DataLayout::yxfb);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::byxf);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBiasPerFeature();
k.EnableNonBiasTerm();
k.EnableMomentum();
k.EnableBatching();
k.EnableSplitSupport();
k.EnableGradient();
k.DisableTuning();
return k;
}
} // namespace kernel_selector

View File

@ -1,29 +0,0 @@
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "convolution_grad_weights_kernel_base.h"
namespace kernel_selector {
class ConvolutionGradWeightsKernelRef : public ConvolutionGradWeightsKernelBase {
public:
ConvolutionGradWeightsKernelRef() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_ref") {}
virtual ~ConvolutionGradWeightsKernelRef() {}
ParamsKey GetSupportedKey() const override;
};
} // namespace kernel_selector

View File

@ -1,36 +0,0 @@
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "convolution_grad_weights_kernel_selector.h"
#include "convolution_grad_weights_kernel_ref.h"
#include "convolution_grad_weights_kernel_1x1.h"
#include "convolution_grad_weights_kernel_yxfb.h"
#include "convolution_grad_weights_kernel_3x3.h"
#include "convolution_grad_weights_kernel_7x7.h"
namespace kernel_selector {
convolution_grad_weights_kernel_selector::convolution_grad_weights_kernel_selector() {
Attach<ConvolutionGradWeightsKernelRef>();
Attach<ConvolutionGradWeightsKernel1x1>();
Attach<ConvolutionGradWeightsKernel_yxfb>();
Attach<ConvolutionGradWeightsKernel3x3>();
Attach<ConvolutionGradWeightsKernel7x7>();
}
KernelsData convolution_grad_weights_kernel_selector::GetBestKernels(const Params& params,
const optional_params& options) const {
return GetNaiveBestKernel(params, options, KernelType::CONVOLUTION_GRAD_WEIGHTS);
}
} // namespace kernel_selector

View File

@ -1,34 +0,0 @@
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "kernel_selector.h"
namespace kernel_selector {
class convolution_grad_weights_kernel_selector : public kernel_selector_base {
public:
static convolution_grad_weights_kernel_selector& Instance() {
static convolution_grad_weights_kernel_selector instance_;
return instance_;
}
convolution_grad_weights_kernel_selector();
virtual ~convolution_grad_weights_kernel_selector() {}
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
};
} // namespace kernel_selector

View File

@ -1,74 +0,0 @@
/*
// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "convolution_grad_weights_kernel_yxfb.h"
namespace kernel_selector {
ParamsKey ConvolutionGradWeightsKernel_yxfb::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F32);
k.EnableInputWeightsType(WeightsType::F32);
k.EnableOutputDataType(Datatype::F32);
k.EnableInputLayout(DataLayout::yxfb);
k.EnableOutputLayout(DataLayout::yxfb);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::byxf);
k.EnableSubGroup();
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBiasPerFeature();
k.EnableNonBiasTerm();
k.EnableMomentum();
k.EnableBatching();
k.EnableSplitSupport();
k.EnableGradient();
k.DisableTuning();
return k;
}
bool ConvolutionGradWeightsKernel_yxfb::Validate(const Params& p, const optional_params&) const {
const convolution_grad_weights_params& params = static_cast<const convolution_grad_weights_params&>(p);
auto batch = params.inputs[0].Batch().v;
if (batch % 16 != 0)
return false;
if (params.stride.x != 1 || params.stride.y != 1)
return false;
return true;
}
ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel_yxfb::SetDefault(
const convolution_grad_weights_params& params) const {
auto input_features = params.weights.IFM().v;
auto output_features = params.weights.OFM().v;
auto x = params.weights.X().v;
auto y = params.weights.Y().v;
DispatchData kd;
kd.gws0 = 16;
kd.gws1 = input_features * output_features;
kd.gws2 = x * y;
kd.lws0 = 16;
kd.lws1 = 1;
kd.lws2 = 1;
kd.efficiency = FORCE_PRIORITY_7;
return kd;
}
} // namespace kernel_selector

View File

@ -1,32 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "convolution_grad_weights_kernel_base.h"
namespace kernel_selector {
class ConvolutionGradWeightsKernel_yxfb : public ConvolutionGradWeightsKernelBase {
public:
ConvolutionGradWeightsKernel_yxfb() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_yxfb") {}
virtual ~ConvolutionGradWeightsKernel_yxfb() {}
DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
bool Validate(const Params& p, const optional_params& o) const override;
ParamsKey GetSupportedKey() const override;
};
} // namespace kernel_selector

View File

@ -35,7 +35,6 @@ ParamsKey DeconvolutionKernel_bfyx_opt::GetSupportedKey() const {
k.EnableBatching(); k.EnableBatching();
k.EnableSplitSupport(); k.EnableSplitSupport();
k.EnableDepthwiseSeparableOpt(); k.EnableDepthwiseSeparableOpt();
k.EnableGradient();
k.EnableGroupedConvolution(); k.EnableGroupedConvolution();
return k; return k;
} }

View File

@ -55,7 +55,6 @@ ParamsKey DeconvolutionKernelRef::GetSupportedKey() const {
k.EnableBatching(); k.EnableBatching();
k.EnableSplitSupport(); k.EnableSplitSupport();
k.EnableDepthwiseSeparableOpt(); k.EnableDepthwiseSeparableOpt();
k.EnableGradient();
k.EnableGroupedConvolution(); k.EnableGroupedConvolution();
k.EnableDifferentTypes(); k.EnableDifferentTypes();
k.EnableDifferentInputWeightsTypes(); k.EnableDifferentInputWeightsTypes();

View File

@ -222,9 +222,7 @@ KernelsData EltwiseKernel_b_fs_yx_fsv16::GetKernelsData(const Params& params, co
kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(),
false, false,
false, false);
newParams.int8_quantization,
newParams.output_calibration);
kd.estimatedTime = runInfo.efficiency; kd.estimatedTime = runInfo.efficiency;

View File

@ -1,288 +0,0 @@
/*
// Copyright (c) 2019-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "eltwise_kernel_b_fs_yx_fsv4.h"
#include "kernel_selector_utils.h"
#include <string>
#include <vector>
namespace kernel_selector {
ParamsKey EltwiseKernel_b_fs_yx_fsv4::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBatching();
k.EnableInt8Quantization();
k.EnableEltwiseStride();
return k;
}
EltwiseKernelBase::DispatchData EltwiseKernel_b_fs_yx_fsv4::SetDefault(const eltwise_params& params) const {
DispatchData kd;
// Because of very specific requirements for data, we may linearize the data,
// i.e. use only one dimension, e.g. 'X'.
// GWS:
// we process 4*4 (4 int8 bytes per on block_read4 reading) features per workitem
kd.gws0 = params.output.X().v * params.output.Y().v * params.output.Batch().v * params.output.Feature().v / (4 * 4);
kd.gws1 = 1;
kd.gws2 = 1;
// LWS:
kd.lws0 = 8;
kd.lws1 = 1;
kd.lws2 = 1;
kd.efficiency = FORCE_PRIORITY_1;
return kd;
}
bool EltwiseKernel_b_fs_yx_fsv4::Validate(const Params& params, const optional_params& options) const {
// Requirents to use 'eltwise_b_fs_yx_fsv4' kernel are below:
// 1. No stride
// 2. All dimensions for all inputs are the same
// 3. No padding
// So, it can be linearized
if (!Parent::Validate(params, options)) {
return false;
}
KernelData kd = KernelData::Default<eltwise_params>(params);
eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get());
// 1. No stride
if (!newParams.stride.empty()) {
return false;
}
for (size_t i = 0; i < newParams.inputs.size() - 1; i++) {
// 2. All dimensions for all inputs are the same
if (!(newParams.inputs[i] == newParams.inputs[i + 1])) {
return false;
}
}
const auto& in = newParams.inputs[0];
for (size_t i = 0; i < in.Dimentions(); i++) {
// 3. No padding
if ((in.GetDims()[i].pad.before != 0) || (in.GetDims()[i].pad.after != 0)) {
return false;
}
}
return true;
}
JitConstants EltwiseKernel_b_fs_yx_fsv4::GetJitConstants(const eltwise_params& params) const {
JitConstants jit = MakeBaseParamsJitConstants(params);
if (params.inputs[0].GetDType() == Datatype::UINT8) {
// Special handler for unsigned types
jit.AddConstants({MakeJitConstant("ELTW_UNSIGNED", 1)});
}
///////////////
jit.AddConstants({
MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased),
MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization),
});
if (params.int8_quantization) {
if (params.output_calibration) {
jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration));
jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0]));
} else {
jit.AddConstants({MakeJitConstant("O_QF", params.output_quantization_factor)});
}
}
std::string inputs_decls;
auto& updateInputs = params.updateInputIds;
for (size_t i = 0; i < params.inputs.size(); i++) {
// const should be added only to inputs which will not be updated
std::string const_str = "const";
for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++) {
if (updateInputs[update_input_idx].inputId == i) {
const_str = "";
break;
}
}
inputs_decls +=
const_str + " __global " + toCLType(params.inputs[i].GetDType()) + "* input" + std::to_string(i) + ", ";
}
jit.AddConstant(MakeJitConstant("INPUTS_DECLS", inputs_decls));
jit.AddConstant(MakeJitConstant("ELTWISE_NO_PITCH_SAME_DIMS", CheckInputsOutputNoPitchSameDims(params)));
std::string do_eltwise;
auto& operations = params.operations;
auto& coefficients = params.coefficients;
for (size_t op_num = 0; op_num < operations.size(); op_num++) {
const std::string op_num_str = std::to_string(op_num);
const auto& ew = operations[op_num];
for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) {
const auto& input = ew.inputs[input_idx];
const std::string name = "INPUT_" + op_num_str + "_" + std::to_string(input_idx);
switch (input.mode) {
case EltwiseInputMode::SCALAR:
jit.AddConstant(MakeJitConstant(name, input.scalar));
break;
case EltwiseInputMode::INPUT_BUFFER:
jit.AddConstant(MakeJitConstant(name,
"GET_INPUT(input" + std::to_string(input.index) + ", INPUT" +
std::to_string(input.index) + ")"));
break;
case EltwiseInputMode::OUTPUT_BUFFER:
jit.AddConstant(MakeJitConstant(name, "output[GET_INDEX(OUTPUT, )]"));
break;
case EltwiseInputMode::UNORDERED_ACCESS_INPUT_BUFFER:
jit.AddConstant(MakeJitConstant(
name,
"input" + std::to_string(input.index) + "[(size_t)tmp" + std::to_string(input.tmpIndex) + "]"));
break;
case EltwiseInputMode::INTERMEDIATE_RESULTS_INDEX:
jit.AddConstant(MakeJitConstant(name, "tmp" + std::to_string(input.tmpIndex)));
break;
default:
break;
}
}
std::string input0_str, input1_str, cast_type, op;
cast_type = "(int16)";
op = "const int16 tmp" + op_num_str + " = ";
input0_str = cast_type + "INPUT_" + op_num_str + "_0";
input1_str = cast_type + "INPUT_" + op_num_str + "_1";
if (ew.mode == EltwiseMode::ADD) {
std::vector<std::string> coeff_strings(ew.inputs.size(), "");
for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) {
const auto& input = ew.inputs[input_idx];
if (input.mode == EltwiseInputMode::INPUT_BUFFER && input.index < coefficients.size()) {
const float c = coefficients[input.index];
if (c != 1.0f)
coeff_strings[input_idx] = cast_type + "(" + std::to_string(c) + ")*";
}
}
input0_str = coeff_strings[0] + input0_str;
input1_str = coeff_strings[1] + input1_str;
}
switch (ew.mode) {
case EltwiseMode::ADD:
op += input0_str + " + " + input1_str;
break;
case EltwiseMode::SUB:
op += input0_str + " - " + input1_str;
break;
case EltwiseMode::MUL:
op += input0_str + " * " + input1_str;
break;
case EltwiseMode::DIV:
op += input0_str + " / " + input1_str;
break;
case EltwiseMode::MODULU:
case EltwiseMode::MIN:
case EltwiseMode::MAX: {
auto mode = (ew.mode == EltwiseMode::MODULU ? "mod" : (ew.mode == EltwiseMode::MIN ? "min" : "max"));
auto input_0_type = params.inputs[0].GetDType();
auto input_1_type = params.inputs[1].GetDType();
// input_0 == int
if (input_0_type == kernel_selector::Datatype::INT8 ||
input_0_type == kernel_selector::Datatype::UINT8) {
// input_0 == int && input_1 == int
if (input_1_type == kernel_selector::Datatype::INT8 ||
input_1_type == kernel_selector::Datatype::UINT8) {
if (ew.mode == EltwiseMode::MODULU)
op += input0_str + " % " + input1_str;
else
op += cast_type + mode + "(" + input0_str + ", " + input1_str + ")";
// input_0 == int && input_1 != int
} else {
op += cast_type + "f" + mode + "(convert_float(" + input0_str + "), " + input1_str + ")";
}
// input_0 != int && input_1 == int
} else if (input_1_type == kernel_selector::Datatype::INT8 ||
input_1_type == kernel_selector::Datatype::UINT8) {
op += cast_type + "f" + mode + "(" + input0_str + ", convert_float(" + input1_str + "))";
// input_0 != int && input_1 != int
} else {
op += cast_type + "f" + mode + "(" + input0_str + ", " + input1_str + ")";
}
} break;
case EltwiseMode::POW:
op += cast_type + "pow(" + input0_str + ", " + input1_str + ")";
break;
case EltwiseMode::SQRT:
op += cast_type + "sqrt(" + input0_str + ")";
break;
case EltwiseMode::RSQRT:
op += cast_type + "1/sqrt(" + input0_str + ")";
break;
case EltwiseMode::ASSIGN:
op += input0_str;
break;
default:
break;
}
std::string opname = "OPERATION" + op_num_str;
jit.AddConstant(MakeJitConstant(opname, op));
do_eltwise += "\\\n\t" + opname + ";";
}
for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++)
do_eltwise += "\\\n\tinput" + std::to_string(updateInputs[update_input_idx].inputId) + "[GET_INDEX(INPUT, " +
std::to_string(updateInputs[update_input_idx].inputId) + ")] = tmp" +
std::to_string(updateInputs[update_input_idx].tmpId) + ";";
do_eltwise += "\\\n\tres = tmp" + std::to_string(operations.size() - 1) + ";";
jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise));
if (params.layoutBased || params.int8_quantization) {
jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0]));
}
if (!params.stride.empty()) {
jit.AddConstant(MakeJitConstant("INPUT_STRIDED", 1));
}
///////////////
return jit;
}
KernelsData EltwiseKernel_b_fs_yx_fsv4::GetKernelsData(const Params& params, const optional_params& options) const {
return GetCommonKernelsData(params, options);
}
} // namespace kernel_selector

View File

@ -1,36 +0,0 @@
/*
// Copyright (c) 2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "eltwise_kernel_base.h"
namespace kernel_selector {
class EltwiseKernel_b_fs_yx_fsv4 : public EltwiseKernelBase {
public:
using Parent = EltwiseKernelBase;
EltwiseKernel_b_fs_yx_fsv4() : EltwiseKernelBase("eltwise_b_fs_yx_fsv4") {}
virtual ~EltwiseKernel_b_fs_yx_fsv4() {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
protected:
bool Validate(const Params& params, const optional_params& options) const override;
JitConstants GetJitConstants(const eltwise_params& params) const override;
DispatchData SetDefault(const eltwise_params& params) const override;
};
} // namespace kernel_selector

View File

@ -51,17 +51,6 @@ static uint32_t GetNumberOfInputs(EltwiseMode m) {
ParamsKey eltwise_params::GetParamsKey() const { ParamsKey eltwise_params::GetParamsKey() const {
ParamsKey k = base_params::GetParamsKey(); ParamsKey k = base_params::GetParamsKey();
if (int8_quantization) {
k.EnableInt8Quantization();
}
if (output_calibration) {
k.EnableOutputCalibration();
}
if (inputs_calibration) {
k.EnableEltwiseInputsCalibration();
}
if (!stride.empty()) { if (!stride.empty()) {
k.EnableEltwiseStride(); k.EnableEltwiseStride();
@ -617,9 +606,7 @@ KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const
kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(),
false, false,
false, false);
newParams.int8_quantization,
newParams.output_calibration);
kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE; kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;

View File

@ -84,14 +84,8 @@ struct eltwise_params : public base_params {
bool layoutBased = false; bool layoutBased = false;
bool int8_quantization = false; bool int8_quantization = false;
bool output_calibration = false;
float output_quantization_factor = 1.0f;
bool inputs_calibration = false;
bool broadcast = false; bool broadcast = false;
MultiDataTensor output_calibration_factors;
MultiDataTensor inputs_calibration_factors;
std::vector<float> input_quantization_factors;
virtual ParamsKey GetParamsKey() const; virtual ParamsKey GetParamsKey() const;
}; };

View File

@ -1,301 +0,0 @@
// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h"
#include "kernel_selector_utils.h"
#include <string>
#include <vector>
namespace kernel_selector {
ParamsKey EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBatching();
k.EnableInt8Quantization();
k.EnableEltwiseStride();
return k;
}
EltwiseKernelBase::DispatchData EltwiseKernel_fs_bs_yx_bsv4_fsv32::SetDefault(const eltwise_params& params) const {
DispatchData kd;
kd.gws0 = params.output.X().v;
kd.gws1 = params.output.Y().v;
// we process 4 batches and 4 features per workitem
kd.gws2 = (params.output.Batch().v / 4) * (params.output.Feature().v / 4);
kd.lws0 = 1;
kd.lws1 = 1;
kd.lws2 = 8;
kd.efficiency = FORCE_PRIORITY_3;
return kd;
}
JitConstants EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetJitConstants(const eltwise_params& params) const {
JitConstants jit = MakeBaseParamsJitConstants(params);
const size_t in_x_pitch = 32 * 4;
const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
const size_t in_offset =
in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
///////////////
jit.AddConstants({
MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased),
MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization),
});
if (params.int8_quantization) {
if (params.output_calibration) {
jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration));
jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0]));
} else {
jit.AddConstants({MakeJitConstant("O_QF", params.output_quantization_factor)});
}
}
std::string inputs_decls;
auto& updateInputs = params.updateInputIds;
for (size_t i = 0; i < params.inputs.size(); i++) {
// const should be added only to inputs which will not be updated
std::string const_str = "const";
for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++) {
if (updateInputs[update_input_idx].inputId == i) {
const_str = "";
break;
}
}
inputs_decls +=
const_str + " __global " + toCLType(params.inputs[i].GetDType()) + "* input" + std::to_string(i) + ", ";
if (!params.stride.empty()) {
jit.AddConstant(MakeJitConstant("INPUT" + std::to_string(i) + "_STRIDE_X", params.stride[i].x));
jit.AddConstant(MakeJitConstant("INPUT" + std::to_string(i) + "_STRIDE_Y", params.stride[i].y));
}
}
jit.AddConstant(MakeJitConstant("INPUTS_DECLS", inputs_decls));
jit.AddConstant(MakeJitConstant("ELTWISE_NO_PITCH_SAME_DIMS", CheckInputsOutputNoPitchSameDims(params)));
std::string do_eltwise;
auto& operations = params.operations;
auto& coefficients = params.coefficients;
for (size_t op_num = 0; op_num < operations.size(); op_num++) {
const std::string op_num_str = std::to_string(op_num);
const auto& ew = operations[op_num];
for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) {
const auto& input = ew.inputs[input_idx];
const std::string name = "INPUT_" + op_num_str + "_" + std::to_string(input_idx);
switch (input.mode) {
case EltwiseInputMode::SCALAR:
jit.AddConstant(MakeJitConstant(name, input.scalar));
break;
case EltwiseInputMode::INPUT_BUFFER:
jit.AddConstant(MakeJitConstant(name,
"GET_INPUT(input" + std::to_string(input.index) + ", INPUT" +
std::to_string(input.index) + ")"));
break;
case EltwiseInputMode::OUTPUT_BUFFER:
jit.AddConstant(MakeJitConstant(name, "output[GET_INDEX(OUTPUT, )]"));
break;
case EltwiseInputMode::UNORDERED_ACCESS_INPUT_BUFFER:
jit.AddConstant(MakeJitConstant(
name,
"input" + std::to_string(input.index) + "[(size_t)tmp" + std::to_string(input.tmpIndex) + "]"));
break;
case EltwiseInputMode::INTERMEDIATE_RESULTS_INDEX:
jit.AddConstant(MakeJitConstant(name, "tmp" + std::to_string(input.tmpIndex)));
break;
default:
break;
}
}
std::string input0_str, input1_str, cast_type, op;
if (params.int8_quantization) {
cast_type = "(int16)";
op = "const int16 tmp" + op_num_str + " = ";
} else {
cast_type = "(UNIT_TYPE)";
op = "const UNIT_TYPE tmp" + op_num_str + " = ";
}
input0_str = cast_type + "INPUT_" + op_num_str + "_0";
input1_str = cast_type + "INPUT_" + op_num_str + "_1";
if (ew.mode == EltwiseMode::ADD) {
std::vector<std::string> coeff_strings(ew.inputs.size(), "");
for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) {
const auto& input = ew.inputs[input_idx];
if (input.mode == EltwiseInputMode::INPUT_BUFFER && input.index < coefficients.size()) {
const float c = coefficients[input.index];
if (c != 1.0f)
coeff_strings[input_idx] = cast_type + "(" + std::to_string(c) + ")*";
}
}
input0_str = coeff_strings[0] + input0_str;
input1_str = coeff_strings[1] + input1_str;
}
switch (ew.mode) {
case EltwiseMode::ADD:
op += input0_str + " + " + input1_str;
break;
case EltwiseMode::SUB:
op += input0_str + " - " + input1_str;
break;
case EltwiseMode::MUL:
op += input0_str + " * " + input1_str;
break;
case EltwiseMode::DIV:
op += input0_str + " / " + input1_str;
break;
case EltwiseMode::MODULU:
case EltwiseMode::MIN:
case EltwiseMode::MAX: {
auto mode = (ew.mode == EltwiseMode::MODULU ? "mod" : (ew.mode == EltwiseMode::MIN ? "min" : "max"));
auto input_0_type = params.inputs[0].GetDType();
auto input_1_type = params.inputs[1].GetDType();
// input_0 == int
if (input_0_type == kernel_selector::Datatype::INT8 ||
input_0_type == kernel_selector::Datatype::INT32 ||
input_0_type == kernel_selector::Datatype::INT64) {
// input_0 == int && input_1 == int
if (input_1_type == kernel_selector::Datatype::INT8 ||
input_1_type == kernel_selector::Datatype::INT32 ||
input_1_type == kernel_selector::Datatype::INT64) {
if (ew.mode == EltwiseMode::MODULU)
op += input0_str + " % " + input1_str;
else
op += cast_type + mode + "(" + input0_str + ", " + input1_str + ")";
// input_0 == int && input_1 != int
} else {
op += cast_type + "f" + mode + "(convert_float(" + input0_str + "), " + input1_str + ")";
}
// input_0 != int && input_1 == int
} else if (input_1_type == kernel_selector::Datatype::INT8 ||
input_1_type == kernel_selector::Datatype::INT32 ||
input_1_type == kernel_selector::Datatype::INT64) {
op += cast_type + "f" + mode + "(" + input0_str + ", convert_float(" + input1_str + "))";
// input_0 != int && input_1 != int
} else {
op += cast_type + "f" + mode + "(" + input0_str + ", " + input1_str + ")";
}
} break;
case EltwiseMode::POW:
op += cast_type + "pow(" + input0_str + ", " + input1_str + ")";
break;
case EltwiseMode::SQRT:
op += cast_type + "sqrt(" + input0_str + ")";
break;
case EltwiseMode::RSQRT:
op += cast_type + "1/sqrt(" + input0_str + ")";
break;
case EltwiseMode::SQUARED_DIFF:
op += cast_type + "((" + input0_str + " - " + input1_str +
")"
" * (" +
input0_str + " - " + input1_str + "))";
break;
case EltwiseMode::EQ:
op += cast_type + "(" + input0_str + " == " + input1_str + ")";
break;
case EltwiseMode::NE:
op += cast_type + "(" + input0_str + " != " + input1_str + ")";
break;
case EltwiseMode::LT:
op += cast_type + "(" + input0_str + " < " + input1_str + ")";
break;
case EltwiseMode::LE:
op += cast_type + "(" + input0_str + " <= " + input1_str + ")";
break;
case EltwiseMode::GT:
op += cast_type + "(" + input0_str + " > " + input1_str + ")";
break;
case EltwiseMode::GE:
op += cast_type + "(" + input0_str + " >= " + input1_str + ")";
break;
case EltwiseMode::LOGIC_AND:
op += cast_type + "(" + input0_str + " && " + input1_str + ")";
break;
case EltwiseMode::LOGIC_OR:
op += cast_type + "(" + input0_str + " || " + input1_str + ")";
break;
case EltwiseMode::LOGIC_XOR:
op += cast_type + "(!" + input0_str + " != !" + input1_str + ")";
break;
case EltwiseMode::FLOOR_MOD:
op += cast_type + "(" + input0_str + " - " + input0_str + " / " + input1_str + " * " + input1_str + ")";
break;
case EltwiseMode::ASSIGN:
op += input0_str;
break;
default:
break;
}
std::string opname = "OPERATION" + op_num_str;
jit.AddConstant(MakeJitConstant(opname, op));
do_eltwise += "\\\n\t" + opname + ";";
}
for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++)
do_eltwise += "\\\n\tinput" + std::to_string(updateInputs[update_input_idx].inputId) + "[GET_INDEX(INPUT, " +
std::to_string(updateInputs[update_input_idx].inputId) + ")] = tmp" +
std::to_string(updateInputs[update_input_idx].tmpId) + ";";
do_eltwise += "\\\n\tres = tmp" + std::to_string(operations.size() - 1) + ";";
jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise));
if (params.layoutBased || params.int8_quantization) {
jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0]));
}
if (!params.stride.empty()) {
jit.AddConstant(MakeJitConstant("INPUT_STRIDED", 1));
}
///////////////
return jit;
}
KernelsData EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params,
const optional_params& options) const {
return GetCommonKernelsData(params, options);
}
} // namespace kernel_selector

View File

@ -1,32 +0,0 @@
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "eltwise_kernel_base.h"
namespace kernel_selector {
class EltwiseKernel_fs_bs_yx_bsv4_fsv32 : public EltwiseKernelBase {
public:
EltwiseKernel_fs_bs_yx_bsv4_fsv32() : EltwiseKernelBase("eltwise_fs_bs_yx_bsv4_fsv32") {}
virtual ~EltwiseKernel_fs_bs_yx_bsv4_fsv32() {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
protected:
JitConstants GetJitConstants(const eltwise_params& params) const override;
DispatchData SetDefault(const eltwise_params& params) const override;
};
} // namespace kernel_selector

View File

@ -38,7 +38,6 @@ ParamsKey EltwiseKernelRef::GetSupportedKey() const {
k.EnableTensorOffset(); k.EnableTensorOffset();
k.EnableTensorPitches(); k.EnableTensorPitches();
k.EnableBatching(); k.EnableBatching();
k.EnableInt8Quantization();
k.EnableEltwiseStride(); k.EnableEltwiseStride();
k.EnableEltwiseBroadcast(); k.EnableEltwiseBroadcast();
return k; return k;

View File

@ -16,8 +16,6 @@
#include "eltwise_kernel_selector.h" #include "eltwise_kernel_selector.h"
#include "eltwise_kernel_ref.h" #include "eltwise_kernel_ref.h"
#include "eltwise_kernel_vload8.h" #include "eltwise_kernel_vload8.h"
#include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h"
#include "eltwise_kernel_b_fs_yx_fsv4.h"
#include "eltwise_kernel_fs_b_yx_fsv32.h" #include "eltwise_kernel_fs_b_yx_fsv32.h"
#include "eltwise_kernel_b_fs_yx_fsv16.h" #include "eltwise_kernel_b_fs_yx_fsv16.h"
#include "eltwise_kernel_mixed_byxf_and_fs_b_yx_fsv32.h" #include "eltwise_kernel_mixed_byxf_and_fs_b_yx_fsv32.h"
@ -26,8 +24,6 @@ namespace kernel_selector {
eltwise_kernel_selector::eltwise_kernel_selector() { eltwise_kernel_selector::eltwise_kernel_selector() {
Attach<EltwiseKernelRef>(); Attach<EltwiseKernelRef>();
Attach<EltwiseKernel_vload8>(); Attach<EltwiseKernel_vload8>();
Attach<EltwiseKernel_fs_bs_yx_bsv4_fsv32>();
Attach<EltwiseKernel_b_fs_yx_fsv4>();
Attach<EltwiseKernel_fs_b_yx_fsv32>(); Attach<EltwiseKernel_fs_b_yx_fsv32>();
Attach<EltwiseKernel_mixed_byxf_and_fs_b_yx_fsv32>(); Attach<EltwiseKernel_mixed_byxf_and_fs_b_yx_fsv32>();
Attach<EltwiseKernel_b_fs_yx_fsv16>(); Attach<EltwiseKernel_b_fs_yx_fsv16>();

View File

@ -1,108 +0,0 @@
/*
// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "embed_kernel_ref.h"
#include "kernel_selector_utils.h"
#include "common_tools.h"
#include <vector>
namespace kernel_selector {
ParamsKey EmbedKernelRef::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableInputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::INT8);
k.EnableInputWeightsType(WeightsType::F16);
k.EnableInputWeightsType(WeightsType::F32);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableAllInputLayout();
k.EnableOutputLayout(DataLayout::bf);
k.EnableBiasPerOutput();
k.EnableBiasPerFeature();
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBatching();
k.EnableNonBiasTerm();
return k;
}
JitConstants EmbedKernelRef::GetJitConstants(const embed_params& params) const {
JitConstants jit = WeightBiasKernelBase::GetJitConstants(params);
const auto& input = params.inputs[0];
const auto x_size = input.LogicalSize() / input.Batch().v;
const auto w_size = params.weights.OFM().v;
jit.AddConstant(MakeJitConstant("INPUT0_ELEMENTS_COUNT", x_size));
jit.AddConstant(MakeJitConstant("NUM_OUTPUT_SIZE", w_size));
return jit;
}
EmbedKernelRef::DispatchData EmbedKernelRef::SetDefault(const embed_params& params) const {
DispatchData kd;
std::vector<size_t> global = {params.inputs[0].X().v, params.weights.OFM().v, params.inputs[0].Batch().v};
std::vector<size_t> local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
kd.gws0 = global[0];
kd.gws1 = global[1];
kd.gws2 = global[2];
kd.lws0 = local[0];
kd.lws1 = local[1];
kd.lws2 = 1;
return kd;
}
KernelsData EmbedKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
assert(params.GetType() == KernelType::EMBED);
const embed_params& orgParams = static_cast<const embed_params&>(params);
DispatchData runInfo = SetDefault(orgParams);
KernelData kd = KernelData::Default<embed_params>(params);
embed_params& newParams = *static_cast<embed_params*>(kd.params.get());
bool succeed = UpdateWeightsParams(newParams, options, WeightsLayout::oiyx, kd.weightsReorderParams);
if (!succeed) {
return {};
}
auto cldnn_jit = GetJitConstants(newParams);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
FillCLKernelData(kernel,
runInfo,
params.engineInfo,
kernelName,
jit,
entry_point,
DEFAULT,
true,
!newParams.bias.empty());
kd.estimatedTime = runInfo.efficiency;
return {kd};
}
} // namespace kernel_selector

View File

@ -1,42 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "weight_bias_kernel_base.h"
#include "embed_params.h"
#include "common_kernel_base.h"
namespace kernel_selector {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// EmbedKernelRef
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class EmbedKernelRef : public WeightBiasKernelBase {
public:
EmbedKernelRef() : WeightBiasKernelBase("embed_ref") {}
virtual ~EmbedKernelRef() {}
struct DispatchData : public CommonDispatchData {};
ParamsKey GetSupportedKey() const override;
protected:
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
virtual JitConstants GetJitConstants(const embed_params& params) const;
virtual DispatchData SetDefault(const embed_params& params) const;
};
} // namespace kernel_selector

View File

@ -1,27 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include "embed_kernel_selector.h"
#include "embed_kernel_ref.h"
namespace kernel_selector {
embed_kernel_selector::embed_kernel_selector() { Attach<EmbedKernelRef>(); }
KernelsData embed_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
return GetNaiveBestKernel(params, options, KernelType::EMBED);
}
} // namespace kernel_selector

View File

@ -1,35 +0,0 @@
/*
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "kernel_selector.h"
namespace kernel_selector {
class embed_kernel_selector : public kernel_selector_base {
public:
static embed_kernel_selector& Instance() {
static embed_kernel_selector instance_;
return instance_;
}
embed_kernel_selector();
virtual ~embed_kernel_selector() {}
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
};
} // namespace kernel_selector

View File

@ -1,51 +0,0 @@
/*
// Copyright (c) 2016 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#pragma once
#include "weight_bias_params.h"
#include <string>
namespace kernel_selector {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// embed_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct embed_params : public weight_bias_params {
embed_params() : weight_bias_params(KernelType::EMBED) {}
std::string to_string() const {
std::stringstream s;
s << base_params::to_string() << "_";
if (bias.empty()) {
s << "no_bias"
<< "_";
} else {
s << "bias_" << bias[0].PhysicalSize() << "_";
}
return s.str();
}
virtual ParamsKey GetParamsKey() const { return weight_bias_params::GetParamsKey(); }
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// embed_optional_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct embed_optional_params : weight_bias_optional_params {
embed_optional_params() : weight_bias_optional_params(KernelType::EMBED) {}
};
} // namespace kernel_selector

View File

@ -29,8 +29,6 @@ JitConstants FullyConnectedKernelBase::GetJitConstants(const fully_connected_par
jit.AddConstant(MakeJitConstant("INPUT0_ELEMENTS_COUNT", x_size)); jit.AddConstant(MakeJitConstant("INPUT0_ELEMENTS_COUNT", x_size));
jit.AddConstant(MakeJitConstant("QUANTIZATION_TERM", params.quantization != QuantizationType::NONE));
return jit; return jit;
} }

View File

@ -1,122 +0,0 @@
// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fully_connected_kernel_mmad_batched.h"
namespace kernel_selector {
ParamsKey FullyConnected_mmad_batched::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
k.EnableOutputLayout(DataLayout::bf);
k.EnableBiasPerOutput();
k.EnableBiasPerFeature();
k.EnableNonBiasTerm();
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBatching();
k.EnableInt8Quantization();
k.EnableOutputCalibration();
return k;
}
bool FullyConnected_mmad_batched::Validate(const Params& p, const optional_params& o) const {
if (!FullyConnectedKernelBase::Validate(p, o)) {
return false;
}
const auto& params = static_cast<const fully_connected_params&>(p);
// we do not support padded input
if (params.inputs[0].X().pad.Total() != 0 || params.inputs[0].Y().pad.Total() != 0)
return false;
size_t batch = params.inputs[0].Batch().v;
// batch must be a multiple of 8
if (batch % 8 != 0) {
return false;
}
return true;
}
JitConstants FullyConnected_mmad_batched::GetJitConstants(const fully_connected_params& params,
const DispatchData& runInfo) const {
auto jit = Parent::GetJitConstants(params, runInfo);
jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws1));
// pitch for special block format used in this kernel
const size_t ifm_32_aligned = Align(params.weights.IFM().v, 32);
const size_t filter_ofm_block_pitch =
(ifm_32_aligned / 32) * params.weights.X().v * params.weights.Y().v * 4 * 8 * 8;
jit.AddConstant(MakeJitConstant("FILTER_OFM_BLOCK_PITCH", filter_ofm_block_pitch));
const size_t in_x_pitch = 32 * 4;
const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
const size_t in_offset =
in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
return jit;
}
FullyConnected_mmad_batched::DispatchData FullyConnected_mmad_batched::SetDefault(const fully_connected_params& params,
int) const {
auto runInfo = Parent::SetDefault(params);
constexpr size_t sub_group_size = 8;
const auto of_maps = params.output.Feature().v;
const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
runInfo.gws0 = params.output.Batch().v / 8; // we process 8 batches in a single WG
runInfo.gws1 = of_threads_per_batch;
runInfo.gws2 = 1;
runInfo.lws0 = 1;
runInfo.lws1 = sub_group_size;
runInfo.lws2 = 1;
runInfo.efficiency = FORCE_PRIORITY_1;
return runInfo;
}
KernelsData FullyConnected_mmad_batched::GetKernelsData(const Params& params, const optional_params& options) const {
KernelsData res = {};
for (size_t i = 0; i < autoTuneOptions.size(); i++) {
KernelsData kd = GetTunedKernelsDataByIndex(params,
options,
DataLayout::fs_bs_yx_bsv4_fsv32,
WeightsLayout::os_is_yx_isa8_osv8_isv4,
FORCE_PRIORITY_1,
static_cast<int>(i));
if (!kd.empty()) {
res.emplace_back(kd[0]);
}
}
return res;
}
} // namespace kernel_selector

View File

@ -1,36 +0,0 @@
// Copyright (c) 2016 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fully_connected_kernel_base.h"
namespace kernel_selector {
class FullyConnected_mmad_batched : public FullyConnectedKernelBase {
public:
using Parent = FullyConnectedKernelBase;
FullyConnected_mmad_batched() : Parent("fully_connected_gpu_mmad_batched") {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
protected:
bool Validate(const Params& p, const optional_params& o) const override;
JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override;
};
} // namespace kernel_selector

View File

@ -28,7 +28,6 @@
#include "fully_connected_kernel_fb_io_block.h" #include "fully_connected_kernel_fb_io_block.h"
#include "fully_connected_kernel_bf_io_input_spatial.h" #include "fully_connected_kernel_bf_io_input_spatial.h"
#include "fully_connected_kernel_mmad.h" #include "fully_connected_kernel_mmad.h"
#include "fully_connected_kernel_mmad_batched.h"
#include "fully_connected_kernel_imad.h" #include "fully_connected_kernel_imad.h"
#include "fully_connected_kernel_fs_byx_fsv32.h" #include "fully_connected_kernel_fs_byx_fsv32.h"
@ -49,7 +48,6 @@ fully_connected_kernel_selector::fully_connected_kernel_selector() {
Attach<FullyConnected_fb_io_b8_f8>(); Attach<FullyConnected_fb_io_b8_f8>();
Attach<FullyConnected_bf_io_input_spatial>(); Attach<FullyConnected_bf_io_input_spatial>();
Attach<FullyConnectedKernelMMAD>(); Attach<FullyConnectedKernelMMAD>();
// Attach<FullyConnected_mmad_batched>();
Attach<FullyConnectedKernelIMAD>(); Attach<FullyConnectedKernelIMAD>();
Attach<FullyConnected_fs_byx_fsv32>(); Attach<FullyConnected_fs_byx_fsv32>();
} }

View File

@ -1,82 +0,0 @@
// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fully_connected_grad_input_kernel_base.h"
#include "kernel_selector_utils.h"
#include <vector>
#include <algorithm>
namespace kernel_selector {
JitConstants FullyConnectedGradInputKernelBase::GetJitConstants(const fully_connected_grad_input_params& params) const {
return WeightBiasKernelBase::GetJitConstants(params);
}
FullyConnectedGradInputKernelBase::DispatchData FullyConnectedGradInputKernelBase::SetDefault(
const fully_connected_grad_input_params& params) const {
DispatchData kd;
kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
size_t gws0 = params.output.Batch().v * params.weights.IFM().v;
size_t lws0 = std::min(gws0, static_cast<size_t>(32));
while (gws0 % lws0) {
lws0--;
}
kd.gws0 = gws0;
kd.gws1 = params.weights.X().v;
kd.gws2 = params.weights.Y().v;
kd.lws0 = lws0;
kd.lws1 = 1;
kd.lws2 = 1;
kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
return kd;
}
KernelsData FullyConnectedGradInputKernelBase::GetKernelsData(const Params& params,
const optional_params& options) const {
assert(params.GetType() == KernelType::FULLY_CONNECTED_GRAD_INPUT);
const fully_connected_grad_input_params& orgParams = static_cast<const fully_connected_grad_input_params&>(params);
DispatchData runInfo = SetDefault(orgParams);
KernelData kd = KernelData::Default<fully_connected_grad_input_params>(params);
fully_connected_grad_input_params& newParams = *static_cast<fully_connected_grad_input_params*>(kd.params.get());
bool succeed = UpdateWeightsParams(newParams, options, WeightsLayout::oi, kd.weightsReorderParams);
if (!succeed) {
return {};
}
auto cldnn_jit = GetJitConstants(orgParams);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
FillCLKernelData(kernel,
runInfo,
params.engineInfo,
kernelName,
jit,
entry_point,
DEFAULT,
true,
!orgParams.bias.empty());
kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
kd.estimatedTime = runInfo.efficiency;
return {kd};
}
} // namespace kernel_selector

View File

@ -1,54 +0,0 @@
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "weight_bias_kernel_base.h"
#include "kernel_selector_params.h"
namespace kernel_selector {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// fully_connected_grad_input_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct fully_connected_grad_input_params : public weight_bias_params {
fully_connected_grad_input_params() : weight_bias_params(KernelType::FULLY_CONNECTED_GRAD_INPUT) {}
virtual ParamsKey GetParamsKey() const { return weight_bias_params::GetParamsKey(); }
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// fully_connected_grad_input_optional_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct fully_connected_grad_input_optional_params : weight_bias_optional_params {
fully_connected_grad_input_optional_params()
: weight_bias_optional_params(KernelType::FULLY_CONNECTED_GRAD_INPUT) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// FullyConnectedGradInputKernelBase
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class FullyConnectedGradInputKernelBase : public WeightBiasKernelBase {
public:
using WeightBiasKernelBase::WeightBiasKernelBase;
virtual ~FullyConnectedGradInputKernelBase() {}
using DispatchData = CommonDispatchData;
protected:
virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const;
virtual JitConstants GetJitConstants(const fully_connected_grad_input_params& params) const;
virtual DispatchData SetDefault(const fully_connected_grad_input_params& params) const;
};
} // namespace kernel_selector

View File

@ -1,44 +0,0 @@
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fully_connected_grad_input_kernel_ref.h"
namespace kernel_selector {
ParamsKey FullyConnectedGradInputKernelRef::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableInputWeightsType(WeightsType::F16);
k.EnableInputWeightsType(WeightsType::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableInputLayout(DataLayout::yxfb);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableInputLayout(DataLayout::byxf);
k.EnableOutputLayout(DataLayout::yxfb);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::byxf);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBiasPerFeature();
k.EnableNonBiasTerm();
// TODO: add support to batching, figure out the way to update weights/biases for multiple batches at the same time
k.EnableBatching();
k.EnableGradient();
k.DisableTuning();
return k;
}
} // namespace kernel_selector

View File

@ -1,29 +0,0 @@
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fully_connected_grad_input_kernel_base.h"
namespace kernel_selector {
class FullyConnectedGradInputKernelRef : public FullyConnectedGradInputKernelBase {
public:
FullyConnectedGradInputKernelRef() : FullyConnectedGradInputKernelBase("fully_connected_grad_input_gpu_ref") {}
virtual ~FullyConnectedGradInputKernelRef() {}
ParamsKey GetSupportedKey() const override;
};
} // namespace kernel_selector

View File

@ -1,28 +0,0 @@
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fully_connected_grad_input_kernel_selector.h"
#include "fully_connected_grad_input_kernel_ref.h"
namespace kernel_selector {
fully_connected_grad_input_kernel_selector::fully_connected_grad_input_kernel_selector() {
Attach<FullyConnectedGradInputKernelRef>();
}
KernelsData fully_connected_grad_input_kernel_selector::GetBestKernels(const Params& params,
const optional_params& options) const {
return GetNaiveBestKernel(params, options, KernelType::FULLY_CONNECTED_GRAD_INPUT);
}
} // namespace kernel_selector

View File

@ -1,34 +0,0 @@
// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "kernel_selector.h"
namespace kernel_selector {
class fully_connected_grad_input_kernel_selector : public kernel_selector_base {
public:
static fully_connected_grad_input_kernel_selector& Instance() {
static fully_connected_grad_input_kernel_selector instance_;
return instance_;
}
fully_connected_grad_input_kernel_selector();
virtual ~fully_connected_grad_input_kernel_selector() {}
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
};
} // namespace kernel_selector

View File

@ -1,93 +0,0 @@
// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fully_connected_grad_weights_kernel_base.h"
#include "kernel_selector_utils.h"
#include <algorithm>
#include <vector>
namespace kernel_selector {
JitConstants FullyConnectedGradWeightsKernelBase::GetJitConstants(
const fully_connected_grad_weights_params& params) const {
JitConstants jit = training_kernel_base::GetJitConstants(params);
return jit;
}
FullyConnectedGradWeightsKernelBase::DispatchData FullyConnectedGradWeightsKernelBase::SetDefault(
const fully_connected_grad_weights_params& params) const {
DispatchData kd;
kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
size_t gws0 = params.weights.OFM().v * params.weights.IFM().v;
size_t lws0 = std::min(gws0, static_cast<size_t>(32));
while (gws0 % lws0) {
lws0--;
}
kd.gws0 = gws0;
kd.gws1 = params.weights.X().v;
kd.gws2 = params.weights.Y().v;
kd.lws0 = lws0;
kd.lws1 = 1;
kd.lws2 = 1;
kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
return kd;
}
KernelsData FullyConnectedGradWeightsKernelBase::GetKernelsData(const Params& params,
const optional_params& options) const {
assert(params.GetType() == KernelType::FULLY_CONNECTED_GRAD_WEIGHTS);
const fully_connected_grad_weights_params& orgParams =
static_cast<const fully_connected_grad_weights_params&>(params);
DispatchData runInfo = SetDefault(orgParams);
KernelData kd = KernelData::Default<fully_connected_grad_weights_params>(params);
fully_connected_grad_weights_params& newParams =
*static_cast<fully_connected_grad_weights_params*>(kd.params.get());
bool succeed = UpdateWeightsParams(newParams, options, WeightsLayout::oi, kd.weightsReorderParams);
if (!succeed) {
return {};
}
auto cldnn_jit = GetJitConstants(orgParams);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
FillCLKernelData(kernel,
runInfo,
params.engineInfo,
kernelName,
jit,
entry_point,
DEFAULT,
true,
!orgParams.bias.empty());
if (orgParams.use_momentum) {
kernel.arguments.push_back({ArgumentDescriptor::Types::PREV_WEIGHTS_GRADIENT, 0});
if (!orgParams.bias.empty())
kernel.arguments.push_back({ArgumentDescriptor::Types::PREV_BIAS_GRADIENT, 0});
}
kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
kernel.arguments.push_back({ArgumentDescriptor::Types::LEARNING_RATE, 0});
kd.estimatedTime = runInfo.efficiency;
return {kd};
}
} // namespace kernel_selector

Some files were not shown because too many files have changed in this diff Show More