[IE CLDNN] Removed unused primitives and related structures (#1039)
This commit is contained in:
parent
66f620f97e
commit
c9d4e6b934
@ -30,7 +30,6 @@
|
||||
#include <api/detection_output.hpp>
|
||||
#include <api/normalize.hpp>
|
||||
#include <api/reshape.hpp>
|
||||
#include <api/batch_norm.hpp>
|
||||
#include <api/permute.hpp>
|
||||
#include <api/split.hpp>
|
||||
#include <api/resample.hpp>
|
||||
@ -1533,49 +1532,11 @@ void Program::CreateBatchNormalizationPrimitive(cldnn::topology& topology, Infer
|
||||
cldnn::primitive_id weightID = bnLayerName + "_" + m_scalesTag;
|
||||
cldnn::primitive_id biasID = bnLayerName + "_" + m_biasesTag;
|
||||
|
||||
#define _SCALE_BN_OPT
|
||||
#ifdef _SCALE_BN_OPT
|
||||
// Using scale as an optimization (1 mad instead of mad+rsq)
|
||||
// create new blobs for scale shift
|
||||
CreateScaleWeightsAndBiasesFromBN(topology, bnLayer, weightID, biasID);
|
||||
auto scalePrim = cldnn::scale(bnLayerName, inputPrimitives[0], weightID, biasID);
|
||||
|
||||
topology.add(scalePrim);
|
||||
#else
|
||||
cldnn::tensor blobTensor(0);
|
||||
const auto bnDims = bnLayer->outData[0]->getTensorDesc().getDims();
|
||||
switch (bnDims.size()) {
|
||||
case 2:
|
||||
blobTensor = cldnn::feature(TensorValue(bnDims[1]));
|
||||
break;
|
||||
case 4:
|
||||
blobTensor = cldnn::feature(TensorValue(bnDims[1]));
|
||||
break;
|
||||
default:
|
||||
THROW_CLDNN_EXCEPTION("Batch normalization input doesn't have 2 or 4 dimensions in " << bnLayer->name);
|
||||
}
|
||||
cldnn::layout blobLayout(
|
||||
DataTypeFromPrecision(layer->precision),
|
||||
m_defaultFormat,
|
||||
blobTensor);
|
||||
|
||||
// Create variance primitive
|
||||
cldnn::primitive_id varianceID = bnLayerName + "_" + m_weightsTag;
|
||||
varianceID = CreatePrimitiveFromBlob(topology, varianceID, bnLayer->_weights, blobLayout);
|
||||
|
||||
// Create mean primitive
|
||||
cldnn::primitive_id meanID = bnLayerName + "_" + m_biasesTag;
|
||||
meanID = CreatePrimitiveFromBlob(topology, meanID, bnLayer->_biases, blobLayout);
|
||||
|
||||
auto bnPrim = cldnn::batch_norm(
|
||||
bnLayerName,
|
||||
inputPrimitives[0],
|
||||
meanID,
|
||||
varianceID,
|
||||
bnLayer->epsilon);
|
||||
|
||||
topology.add(bnPrim);
|
||||
#endif // _SCALE_BN_OPT
|
||||
AddPrimitiveToProfiler(bnLayerName, layer);
|
||||
}
|
||||
|
||||
|
@ -1,22 +0,0 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
// dllmain.cpp : Defines the entry point for the DLL application.
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
|
||||
BOOL APIENTRY DllMain(HMODULE hModule,
|
||||
DWORD ul_reason_for_call,
|
||||
LPVOID lpReserved) {
|
||||
switch (ul_reason_for_call) {
|
||||
case DLL_PROCESS_ATTACH:
|
||||
case DLL_THREAD_ATTACH:
|
||||
case DLL_THREAD_DETACH:
|
||||
case DLL_PROCESS_DETACH:
|
||||
break;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#endif
|
@ -71,13 +71,6 @@ enum class activation_func {
|
||||
gelu // (0.5*val*(1 + erf(val / sqrt(2)))
|
||||
};
|
||||
|
||||
/// @brief activation gradient functions
|
||||
enum class activation_grad_func {
|
||||
none, // val
|
||||
relu, // val * (input > 0)
|
||||
relu_negative_slope, // val * ((input > 0) + a * (input <= 0) (a is additional param)
|
||||
};
|
||||
|
||||
/// @brief activation additional params
|
||||
struct activation_additional_params {
|
||||
float a, b;
|
||||
|
@ -1,96 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include "activation.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Activation gradient for rectified linear unit or parameterized rectified linear unit.
|
||||
/// @par Algorithm:
|
||||
/// out(i,x,y) = input_gradient(i,x,y) * ((input(i,x,y) > 0) + slope(i) * (input(i,x,y) <= 0)
|
||||
/// @par Where:
|
||||
/// @li out(i,x,y) : value at x, y from i-th feature map after activation.
|
||||
/// @li in(i,x,y) : value at x, y from i-th feature map before activation.
|
||||
/// @li slope(i) : the slope value of the i-th feature map (can be shared across channels or one slope per channel).
|
||||
struct activation_grad : public primitive_base<activation_grad> {
|
||||
CLDNN_DECLARE_PRIMITIVE(activation_grad)
|
||||
|
||||
/// @brief Constructs Relu grad primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param input_grad Input gradient primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param activation_grad_func activation_grad function.
|
||||
/// @param additional_params additional params (slope).
|
||||
activation_grad(const primitive_id& id,
|
||||
const primitive_id& input_grad,
|
||||
const primitive_id& input,
|
||||
activation_grad_func activation_grad_function,
|
||||
activation_additional_params additional_params = {0.f, 0.f},
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input_grad, input}, output_padding),
|
||||
activation_grad_function(activation_grad_function),
|
||||
additional_params(additional_params),
|
||||
additional_params_input("") {}
|
||||
|
||||
/// @brief Constructs Relu grad primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param input_grad Input gradient primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param activation_grad_func activation_grad function.
|
||||
/// @param additional_params additional params (slope).
|
||||
activation_grad(const primitive_id& id,
|
||||
const primitive_id& input_grad,
|
||||
const primitive_id& input,
|
||||
const primitive_id& additional_params_input,
|
||||
activation_grad_func activation_grad_function,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input_grad, input}, output_padding),
|
||||
activation_grad_function(activation_grad_function),
|
||||
additional_params({0, 0}),
|
||||
additional_params_input(additional_params_input) {}
|
||||
|
||||
/// @brief activation_grad function.
|
||||
activation_grad_func activation_grad_function;
|
||||
|
||||
/// @brief activation_grad additional params.
|
||||
activation_additional_params additional_params;
|
||||
|
||||
/// @brief PRelu activation slope input primitive id.
|
||||
/// Input x dimension should be equal to input feature size (one slope per channel).
|
||||
/// All other dimensions should be 1.
|
||||
primitive_id additional_params_input;
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
if (additional_params_input.empty())
|
||||
return {};
|
||||
return {additional_params_input};
|
||||
}
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
111
inference-engine/thirdparty/clDNN/api/apply_adam.hpp
vendored
111
inference-engine/thirdparty/clDNN/api/apply_adam.hpp
vendored
@ -1,111 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Apply Adam primitive.
|
||||
/// @details Updates output using Adam algorithm. The output of this primitive should be mutable_data type in case user wants to update
|
||||
/// variable accross network. If output is not mutable_data then it will be initialized with 0.
|
||||
/// "Adam: A Method for Stochastic Optimization" by Diederik P. Kingma, Jimmy Ba
|
||||
/// @n See: https://arxiv.org/abs/1412.6980
|
||||
///
|
||||
/// <b>Algorithm:</b>
|
||||
/// @n float lr[t] = lr * sqrt(1 - beta2^t) / (1 - beta1^t);
|
||||
/// @n float m[t] = beta1 * m[t-1] + (1 - beta1) * grad[t];
|
||||
/// @n float v[t] = beta2 * v[t-1] + (1 - beta2) * grad[t] * grad[t];
|
||||
/// @n float result = result - lr[t] * m[t] / (sqrt(v[t]) + epsilon);
|
||||
|
||||
struct apply_adam : public primitive_base<apply_adam> {
|
||||
CLDNN_DECLARE_PRIMITIVE(apply_adam)
|
||||
|
||||
/// @brief Constructs apply Adam primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input gradient primitive id.
|
||||
/// @param m Primitive id containing mean data.
|
||||
/// @param v Primitive id containing variance.
|
||||
/// @param beta1_power Primitive id containing beta1^t.
|
||||
/// @param beta2_power Primitive id containing beta2^t.
|
||||
/// @param lr Learning rate parameter.
|
||||
/// @param beta1 Beta1 parameter.
|
||||
/// @param beta2 Beta2 parameter.
|
||||
/// @param epsilon Epsilon.
|
||||
/// @param dependency_id Optional primitive id that need to complete before execution of this primitive. Used only for synchronization.
|
||||
apply_adam(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const primitive_id& m,
|
||||
const primitive_id& v,
|
||||
const primitive_id& beta1_power,
|
||||
const primitive_id& beta2_power,
|
||||
float lr,
|
||||
float beta1,
|
||||
float beta2,
|
||||
float epsilon,
|
||||
const primitive_id& dependency_id = "",
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
m(m),
|
||||
v(v),
|
||||
beta1_power(beta1_power),
|
||||
beta2_power(beta2_power),
|
||||
lr(lr),
|
||||
beta1(beta1),
|
||||
beta2(beta2),
|
||||
epsilon(epsilon),
|
||||
dependency_id(dependency_id) {}
|
||||
|
||||
/// @brief Primitive id containing m data.
|
||||
primitive_id m;
|
||||
/// @brief Primitive id containing v data.
|
||||
primitive_id v;
|
||||
/// @brief Primitive id containing beta1^t.
|
||||
primitive_id beta1_power;
|
||||
/// @brief Primitive id containing beta2^t.
|
||||
primitive_id beta2_power;
|
||||
/// @brief Learning rate parameter.
|
||||
float lr;
|
||||
/// @brief Beta1 parameter.
|
||||
float beta1;
|
||||
/// @brief Beta2 parameter.
|
||||
float beta2;
|
||||
/// @brief Epsilon.
|
||||
float epsilon;
|
||||
/// @brief Optional primitive id that need to complete before execution of this primitive. Used only for synchronization.
|
||||
primitive_id dependency_id;
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
std::vector<std::reference_wrapper<const primitive_id>> ret{m, v, beta1_power, beta2_power};
|
||||
ret.reserve(!dependency_id.empty());
|
||||
if (!dependency_id.empty())
|
||||
ret.push_back(dependency_id);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
184
inference-engine/thirdparty/clDNN/api/batch_norm.hpp
vendored
184
inference-engine/thirdparty/clDNN/api/batch_norm.hpp
vendored
@ -1,184 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Batch normalization primitive.
|
||||
/// @details Performs batch normalization as discribed in
|
||||
/// "Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift" by Ioffe, Szegedy
|
||||
/// @n See: http://arxiv.org/abs/1502.03167
|
||||
///
|
||||
/// <b>Algorithm:</b>
|
||||
/// @n global stats can be computed as:
|
||||
/// @n out[i] = ( (in[i] - mean[b]) / sqrt(variance[b] + epsilon) ) * scale[b] + shift[b]
|
||||
|
||||
struct batch_norm : public primitive_base<batch_norm> {
|
||||
CLDNN_DECLARE_PRIMITIVE(batch_norm)
|
||||
|
||||
/// @brief Constructs batch normalization primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param mean Primitive id containing mean data.
|
||||
/// @param variance Primitive id containing variance.
|
||||
/// @param epsilon Epsilon.
|
||||
batch_norm(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const primitive_id& mean,
|
||||
const primitive_id& variance,
|
||||
float epsilon,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
mean(mean),
|
||||
variance(variance),
|
||||
inv_variance(""),
|
||||
epsilon(epsilon) {}
|
||||
|
||||
/// @brief Constructs batch normalization primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param mean Primitive id containing mean data.
|
||||
/// @param variance Primitive id containing variance.
|
||||
/// @brief scale Primitive id containing scale.
|
||||
/// @brief shift Primitive id containing shift.
|
||||
/// @param epsilon Epsilon.
|
||||
batch_norm(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const primitive_id& mean,
|
||||
const primitive_id& variance,
|
||||
const primitive_id& scale,
|
||||
const primitive_id& shift,
|
||||
float epsilon,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
mean(mean),
|
||||
variance(variance),
|
||||
scale(scale),
|
||||
shift(shift),
|
||||
inv_variance(""),
|
||||
epsilon(epsilon) {}
|
||||
|
||||
/// @brief Constructs batch normalization primitive with mean and variance calculation (used for training).
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param epsilon Epsilon.
|
||||
/// @param inv_variance Primitive id containing inverted variance calculated in this primitive. For inference leave empty.
|
||||
batch_norm(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
float epsilon,
|
||||
const primitive_id& inv_variance = "",
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
mean(""),
|
||||
variance(""),
|
||||
inv_variance(inv_variance),
|
||||
epsilon(epsilon) {}
|
||||
|
||||
/// @brief Constructs batch normalization primitive with mean and variance calculation (used for training).
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @brief scale Primitive id containing scale.
|
||||
/// @brief shift Primitive id containing shift.
|
||||
/// @param epsilon Epsilon.
|
||||
/// @param inv_variance Primitive id containing inverted variance calculated in this primitive. For inference leave empty.
|
||||
batch_norm(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
float epsilon,
|
||||
const primitive_id& scale,
|
||||
const primitive_id& shift,
|
||||
const primitive_id& inv_variance = "",
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
mean(""),
|
||||
variance(""),
|
||||
scale(scale),
|
||||
shift(shift),
|
||||
inv_variance(inv_variance),
|
||||
epsilon(epsilon) {}
|
||||
|
||||
/// @brief Constructs batch normalization primitive with mean and variance calculation (used for training).
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @brief scale Primitive id containing scale.
|
||||
/// @brief shift Primitive id containing shift.
|
||||
/// @brief mean_out Primitive id containing mean output.
|
||||
/// @brief variance_out Primitive id containing variance output.
|
||||
/// @param epsilon Epsilon.
|
||||
/// @param inv_variance Primitive id containing inverted variance calculated in this primitive. For inference leave empty.
|
||||
batch_norm(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
float epsilon,
|
||||
const primitive_id& mean_out,
|
||||
const primitive_id& variance_out,
|
||||
const primitive_id& scale,
|
||||
const primitive_id& shift,
|
||||
const primitive_id& inv_variance = "",
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
mean(mean_out),
|
||||
variance(variance_out),
|
||||
scale(scale),
|
||||
shift(shift),
|
||||
inv_variance(inv_variance),
|
||||
epsilon(epsilon) {}
|
||||
|
||||
/// @brief Primitive id containing mean data.
|
||||
primitive_id mean;
|
||||
/// @brief Primitive id containing variance.
|
||||
primitive_id variance;
|
||||
/// @brief Primitive id containing scale.
|
||||
primitive_id scale;
|
||||
/// @brief Primitive id containing shift.
|
||||
primitive_id shift;
|
||||
/// @brief Primitive id containing inverted variance used in future gradient computing.
|
||||
primitive_id inv_variance;
|
||||
/// @brief Epsilon.
|
||||
float epsilon;
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
std::vector<std::reference_wrapper<const primitive_id>> deps;
|
||||
|
||||
if (!mean.empty() && !variance.empty()) {
|
||||
deps.push_back(mean);
|
||||
deps.push_back(variance);
|
||||
}
|
||||
|
||||
if (!scale.empty() && !shift.empty()) {
|
||||
deps.push_back(scale);
|
||||
deps.push_back(shift);
|
||||
}
|
||||
|
||||
if (!inv_variance.empty())
|
||||
deps.push_back(inv_variance);
|
||||
|
||||
return deps;
|
||||
}
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
@ -1,61 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Performs backward batch normalization layer.
|
||||
/// @details Calculates mean gradient and gradient * input for every feature in data,
|
||||
/// then output is calculated as inv_variance * (input_grad - mean_grad_input * input - mean_grad)
|
||||
struct batch_norm_grad : public primitive_base<batch_norm_grad> {
|
||||
CLDNN_DECLARE_PRIMITIVE(batch_norm_grad)
|
||||
|
||||
/// @brief Constructs batch normalization backward layer.
|
||||
/// @param id This primitive id.
|
||||
/// @param input_grad Input gradient primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param inv_variance Primitive id containing inverted variance from forward pass.
|
||||
batch_norm_grad(
|
||||
const primitive_id& id,
|
||||
const primitive_id& input_grad,
|
||||
const primitive_id& input,
|
||||
const primitive_id& inv_variance,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input_grad, input}, output_padding), inv_variance(inv_variance) {
|
||||
}
|
||||
|
||||
/// @brief Primitive id containing inverted variance from forward pass.
|
||||
primitive_id inv_variance;
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
return {inv_variance};
|
||||
}
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
@ -1,95 +0,0 @@
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Select mode for the @ref contract layer.
|
||||
enum class contract_mode : int32_t {
|
||||
/// @brief Sum reduction.
|
||||
sum,
|
||||
/// @brief Product reduction.
|
||||
prod,
|
||||
/// @brief All reduction.
|
||||
all,
|
||||
/// @brief Any reduction.
|
||||
any,
|
||||
/// @brief Max reduction.
|
||||
max
|
||||
};
|
||||
|
||||
/// @brief Reduces input with an operation defined by @p mode along defined
|
||||
/// by @p reduction_axes dimensions.
|
||||
///
|
||||
/// @details Reduces the input using the binary operation determined by
|
||||
/// @p mode. The @p reduction_axes determine the final shape of the
|
||||
/// output, which is calculated based on the input shape by
|
||||
/// collapsing the dimensions along which the reduction happens.
|
||||
/// For example, for the input with
|
||||
/// @n <tt>input_sizes = (in_b, in_f, in_y, in_x)</tt>
|
||||
/// @n a reduction with
|
||||
/// @n <tt>reduction_axes = (2)</tt>
|
||||
/// @n would collapse the Y dimension, producing
|
||||
/// @n <tt>output_shape = (1, in_b, in_f, in_x)</tt>
|
||||
/// @n where every element is a @p mode reduction of the input elements with
|
||||
/// @n the same B, F and X coordinates.
|
||||
/// @n
|
||||
/// @n@b Requirements:
|
||||
/// @n - @p reduction_axes size (dimensions count) must be within (inclusive) range
|
||||
/// 1 - 4.
|
||||
/// @n - @p reduction_axes mustn't have duplicate values.
|
||||
/// @n - Values of @p reduction_axes must be within (inclusive) range 0 - 3
|
||||
/// @n Breaking any of these conditions will raise an exception.
|
||||
struct contract : public primitive_base<contract> {
|
||||
CLDNN_DECLARE_PRIMITIVE(contract)
|
||||
|
||||
/// @brief Constructs contract primitive / layer.
|
||||
///
|
||||
/// @param id An identifier of new primitive.
|
||||
/// @param input An identifier of primitive which is an input for newly created
|
||||
/// contract primitive.
|
||||
/// @param mode Reduction mode.
|
||||
/// @param reduction_axes Axes positions (0-based, from left to right) in input_shape
|
||||
/// that are being reduced.
|
||||
/// @param output_padding Optional padding for output from primitive.
|
||||
contract(
|
||||
const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
contract_mode mode,
|
||||
const std::vector<uint16_t>& reduction_axes = {},
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
mode(mode),
|
||||
reduction_axes(reduction_axes) {
|
||||
}
|
||||
/// @param mode Contract mode.
|
||||
contract_mode mode;
|
||||
/// @brief Array of axes positions from input shape (0-based, from left to right)
|
||||
/// along which reduction should happen.
|
||||
std::vector<uint16_t> reduction_axes;
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
@ -1,95 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "deconvolution.hpp"
|
||||
#include "primitive.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Performs backward convolution operation for input.
|
||||
/// @details convolution_grad_input is similar to deconvolution layer without biases and activation support.
|
||||
/// It actually uses deconvolution primitive underneath with gradient bool set to true.
|
||||
struct convolution_grad_input : public deconvolution {
|
||||
/// @brief Constructs convolution_grad_input primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param weights List of primitive ids containing weights data.
|
||||
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
|
||||
/// where (0,0) point of the convolution_grad_input window should start calculations.
|
||||
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
|
||||
/// @param with_activation Enables Relu activation.
|
||||
/// @param activation_slp Relu activation slope.
|
||||
convolution_grad_input(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const std::vector<primitive_id>& weights,
|
||||
tensor stride = {1, 1, 1, 1},
|
||||
tensor input_offset = {0, 0, 0, 0},
|
||||
const padding& output_padding = padding())
|
||||
: deconvolution(id, input, {weights}, stride, input_offset, output_padding, true) {}
|
||||
|
||||
/// @brief Constructs convolution_grad_input primitive (computes input paddings to match output size).
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param weights List of primitive ids containing weights data.
|
||||
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
|
||||
/// where (0,0) point of the convolution_grad_input window should start calculations.
|
||||
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
|
||||
/// @param with_activation Enables Relu activation.
|
||||
/// @param activation_slp Relu activation slope.
|
||||
/// @param output_size User-defined output data size of the primitive (w/o padding).
|
||||
convolution_grad_input(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const std::vector<primitive_id>& weights,
|
||||
tensor stride,
|
||||
tensor input_offset,
|
||||
tensor output_size,
|
||||
const padding& output_padding = padding())
|
||||
: deconvolution(id, input, {weights}, stride, input_offset, output_size, output_padding, true) {}
|
||||
|
||||
/// @brief Constructs convolution_grad_input primitive (computes input paddings to match output size).
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param weights List of primitive ids containing weights data.
|
||||
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
|
||||
/// where (0,0) point of the convolution_grad_input window should start calculations.
|
||||
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
|
||||
/// @param with_activation Enables Relu activation.
|
||||
/// @param activation_slp Relu activation slope.
|
||||
/// @param output_size User-defined output data size of the primitive (w/o padding).
|
||||
/// @return convolution_grad_input primitive with specified settings.
|
||||
static convolution_grad_input create_with_output_size(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const std::vector<primitive_id>& weights,
|
||||
tensor output_size,
|
||||
tensor stride = {1, 1, 1, 1},
|
||||
tensor input_offset = {0, 0, 0, 0},
|
||||
const padding& output_padding = padding()) {
|
||||
return convolution_grad_input(id, input, weights, stride, input_offset, output_size, output_padding);
|
||||
}
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
@ -1,217 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Performs backward convolution operation for weights and biases.
|
||||
/// @details convolution_grad_weights updates weights and bias mutable data for training purposes.
|
||||
/// @details Please note that this primitive was not heavily tested and currently only batch=1 is enabled for this primitive.
|
||||
struct convolution_grad_weights
|
||||
: public primitive_base<convolution_grad_weights> {
|
||||
CLDNN_DECLARE_PRIMITIVE(convolution_grad_weights)
|
||||
|
||||
/// @brief Constructs convolution_grad_weights primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input gradient primitive id.
|
||||
/// @param input Input primitive id from convolution forward pass.
|
||||
/// @param weights List of primitive ids containing weights data.
|
||||
/// @param bias List of primitive ids containing bias data. Provide empty vector if using next parameters without bias.
|
||||
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
|
||||
/// where (0,0) point of the convolution_grad_weights window should start calculations.
|
||||
/// @param dilation Defines dilation size.
|
||||
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
|
||||
/// @param conv_grad Id of primitive which uses weights and biases updated in this primitive.
|
||||
/// This is for correct order of calculating. Leave empty if primitive is last in backward pass.
|
||||
convolution_grad_weights(const primitive_id& id,
|
||||
const primitive_id& input_grad,
|
||||
const primitive_id& input,
|
||||
const std::vector<primitive_id>& weights,
|
||||
const std::vector<primitive_id>& bias,
|
||||
tensor stride = {1, 1, 1, 1},
|
||||
tensor input_offset = {0, 0, 0, 0},
|
||||
tensor dilation = {1, 1, 1, 1},
|
||||
const primitive_id& conv_grad = "",
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input_grad, input}, output_padding),
|
||||
conv_grad(conv_grad),
|
||||
stride(stride),
|
||||
input_offset(input_offset),
|
||||
dilation(dilation),
|
||||
output_grad_w(false),
|
||||
weights(weights),
|
||||
bias(bias),
|
||||
prev_weights_grad(std::vector<primitive_id>(0)),
|
||||
prev_bias_grad(std::vector<primitive_id>(0)) {}
|
||||
|
||||
/// @brief Constructs convolution_grad_weights primitive (w/o bias).
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input gradient primitive id.
|
||||
/// @param input Input primitive id from convolution forward pass.
|
||||
/// @param weights List of primitive ids containing weights data.
|
||||
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
|
||||
/// where (0,0) point of the convolution_grad_weights window should start calculations.
|
||||
/// @param dilation Defines dilation size.
|
||||
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
|
||||
/// @param Should primitive give weights gradient (delta) as an output
|
||||
/// @param conv_grad Id of primitive which uses weights and biases updated in this primitive.
|
||||
/// This is for correct order of calculating. Leave empty if primitive is last in backward pass.
|
||||
convolution_grad_weights(const primitive_id& id,
|
||||
const primitive_id& input_grad,
|
||||
const primitive_id& input,
|
||||
const std::vector<primitive_id>& weights,
|
||||
tensor stride = {1, 1, 1, 1},
|
||||
tensor input_offset = {0, 0, 0, 0},
|
||||
tensor dilation = {1, 1, 1, 1},
|
||||
bool output_grad_w = false,
|
||||
const primitive_id& conv_grad = "",
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input_grad, input}, output_padding),
|
||||
conv_grad(conv_grad),
|
||||
stride(stride),
|
||||
input_offset(input_offset),
|
||||
dilation(dilation),
|
||||
output_grad_w(output_grad_w),
|
||||
weights(weights),
|
||||
bias(std::vector<primitive_id>(0)),
|
||||
prev_weights_grad(std::vector<primitive_id>(0)),
|
||||
prev_bias_grad(std::vector<primitive_id>(0)) {}
|
||||
|
||||
/// @brief Constructs convolution_grad_weights primitive (w/o bias).
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input gradient primitive id.
|
||||
/// @param input Input primitive id from convolution forward pass.
|
||||
/// @param weights List of primitive ids containing weights data.
|
||||
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
|
||||
/// where (0,0) point of the convolution_grad_weights window should start calculations.
|
||||
/// @param dilation Defines dilation size.
|
||||
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
|
||||
/// @param conv_grad Id of primitive which uses weights and biases updated in this primitive.
|
||||
/// This is for correct order of calculating. Leave empty if primitive is last in backward pass.
|
||||
convolution_grad_weights(const primitive_id& id,
|
||||
const primitive_id& input_grad,
|
||||
const primitive_id& input,
|
||||
const std::vector<primitive_id>& weights,
|
||||
tensor stride,
|
||||
tensor input_offset,
|
||||
tensor dilation,
|
||||
const primitive_id& conv_grad = "",
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input_grad, input}, output_padding),
|
||||
conv_grad(conv_grad),
|
||||
stride(stride),
|
||||
input_offset(input_offset),
|
||||
dilation(dilation),
|
||||
output_grad_w(false),
|
||||
weights(weights),
|
||||
bias(std::vector<primitive_id>(0)),
|
||||
prev_weights_grad(std::vector<primitive_id>(0)),
|
||||
prev_bias_grad(std::vector<primitive_id>(0)) {}
|
||||
|
||||
/// @brief Constructs convolution_grad_weights primitive with momentum optimizer.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input gradient primitive id.
|
||||
/// @param input Input primitive id from convolution forward pass.
|
||||
/// @param weights List of primitive ids containing weights data.
|
||||
/// @param bias List of primitive ids containing bias data. Provide empty vector if using next parameters without bias.
|
||||
/// @param prev_weights_grad List of primitive ids which contains weights gradient data calculated in previous iteration. Used in momentum optimizer.
|
||||
/// @param prev_bias_grad List of primitive ids which contains bias gradient data calculated in previous iteration. Used in momentum optimizer.
|
||||
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
|
||||
/// where (0,0) point of the convolution_grad_weights window should start calculations.
|
||||
/// @param dilation Defines dilation size.
|
||||
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
|
||||
/// @param conv_grad Id of primitive which uses weights and biases updated in this primitive.
|
||||
/// This is for correct order of calculating. Leave empty if primitive is last in backward pass.
|
||||
convolution_grad_weights(const primitive_id& id,
|
||||
const primitive_id& input_grad,
|
||||
const primitive_id& input,
|
||||
const std::vector<primitive_id>& weights,
|
||||
const std::vector<primitive_id>& bias,
|
||||
const std::vector<primitive_id>& prev_weights_grad,
|
||||
const std::vector<primitive_id>& prev_bias_grad,
|
||||
tensor stride = {1, 1, 1, 1},
|
||||
tensor input_offset = {0, 0, 0, 0},
|
||||
tensor dilation = {1, 1, 1, 1},
|
||||
const primitive_id& conv_grad = "",
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input_grad, input}, output_padding),
|
||||
conv_grad(conv_grad),
|
||||
stride(stride),
|
||||
input_offset(input_offset),
|
||||
dilation(dilation),
|
||||
output_grad_w(false),
|
||||
weights(weights),
|
||||
bias(bias),
|
||||
prev_weights_grad(prev_weights_grad),
|
||||
prev_bias_grad(prev_bias_grad) {}
|
||||
|
||||
/// @brief Primitive id containing convolution gradient data.
|
||||
primitive_id conv_grad;
|
||||
/// @brief Defines shift in input buffer between adjacent calculations of output values.
|
||||
tensor stride;
|
||||
/// @brief Defines a shift, relative to (0,0) position of the input buffer,
|
||||
/// where (0,0) point of the convolution_grad_weights window should start calculations.
|
||||
tensor input_offset;
|
||||
/// @brief Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
|
||||
/// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
|
||||
/// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
|
||||
tensor dilation;
|
||||
/// @brief Should primitive give weights gradient (delta) as an output
|
||||
bool output_grad_w;
|
||||
/// @brief List of primitive ids containing weights data.
|
||||
const primitive_id_arr weights;
|
||||
/// @brief List of primitive ids containing bias data.
|
||||
const primitive_id_arr bias;
|
||||
/// @brief Array of primitive ids containing weights gradient data calculated in previous iteration.
|
||||
/// Amount of primitives and their memory sizes should be same as weights.
|
||||
const primitive_id_arr prev_weights_grad;
|
||||
/// @brief Array of primitive ids containing bias gradient data calculated in previous iteration.
|
||||
/// Amount of primitives and their memory sizes should be same as biases.
|
||||
const primitive_id_arr prev_bias_grad;
|
||||
|
||||
/// @brief On how many cards split the computation to.
|
||||
int32_t split() const { return static_cast<int32_t>(weights.size()); }
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
std::vector<std::reference_wrapper<const primitive_id>> ret;
|
||||
ret.reserve(weights.size() + bias.size() + !conv_grad.empty() + prev_weights_grad.size() +
|
||||
prev_bias_grad.size());
|
||||
for (auto& w : weights) ret.push_back(std::ref(w));
|
||||
for (auto& b : bias) ret.push_back(std::ref(b));
|
||||
|
||||
for (auto& g : prev_weights_grad) ret.push_back(std::ref(g));
|
||||
for (auto& g : prev_bias_grad) ret.push_back(std::ref(g));
|
||||
if (!conv_grad.empty())
|
||||
ret.push_back(conv_grad);
|
||||
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
@ -56,8 +56,7 @@ struct deconvolution : public primitive_base<deconvolution> {
|
||||
with_output_size(false),
|
||||
groups(1),
|
||||
weights(weights),
|
||||
bias(bias),
|
||||
_gradient(false) {}
|
||||
bias(bias) {}
|
||||
/// @brief Constructs deconvolution primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
@ -83,8 +82,7 @@ struct deconvolution : public primitive_base<deconvolution> {
|
||||
with_output_size(false),
|
||||
groups(groups),
|
||||
weights(weights),
|
||||
bias(bias),
|
||||
_gradient(false) {}
|
||||
bias(bias) {}
|
||||
|
||||
/// @brief Constructs deconvolution primitive (w/o bias).
|
||||
/// @param id This primitive id.
|
||||
@ -100,16 +98,14 @@ struct deconvolution : public primitive_base<deconvolution> {
|
||||
const std::vector<primitive_id>& weights,
|
||||
tensor stride = {1, 1, 1, 1},
|
||||
tensor input_offset = {0, 0, 0, 0},
|
||||
const padding& output_padding = padding(),
|
||||
bool gradient = false)
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
input_offset(input_offset),
|
||||
stride(stride),
|
||||
with_output_size(false),
|
||||
groups(1),
|
||||
weights(weights),
|
||||
bias(std::vector<primitive_id>(0)),
|
||||
_gradient(gradient) {}
|
||||
bias(std::vector<primitive_id>(0)) {}
|
||||
|
||||
/// @brief Constructs deconvolution primitive (w/o bias).
|
||||
/// @param id This primitive id.
|
||||
@ -127,16 +123,14 @@ struct deconvolution : public primitive_base<deconvolution> {
|
||||
uint32_t groups,
|
||||
tensor stride = {1, 1, 1, 1},
|
||||
tensor input_offset = {0, 0, 0, 0},
|
||||
const padding& output_padding = padding(),
|
||||
bool gradient = false)
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
input_offset(input_offset),
|
||||
stride(stride),
|
||||
with_output_size(false),
|
||||
groups(groups),
|
||||
weights(weights),
|
||||
bias(std::vector<primitive_id>(0)),
|
||||
_gradient(gradient) {}
|
||||
bias(std::vector<primitive_id>(0)) {}
|
||||
|
||||
/// @brief Constructs deconvolution primitive (computes input paddings to match output size).
|
||||
/// @param id This primitive id.
|
||||
@ -164,8 +158,7 @@ struct deconvolution : public primitive_base<deconvolution> {
|
||||
output_size(output_size),
|
||||
groups(1),
|
||||
weights(weights),
|
||||
bias(bias),
|
||||
_gradient(false) {}
|
||||
bias(bias) {}
|
||||
|
||||
/// @brief Constructs deconvolution primitive (computes input paddings to match output size).
|
||||
/// @param id This primitive id.
|
||||
@ -195,8 +188,7 @@ struct deconvolution : public primitive_base<deconvolution> {
|
||||
output_size(output_size),
|
||||
groups(groups),
|
||||
weights(weights),
|
||||
bias(bias),
|
||||
_gradient(false) {}
|
||||
bias(bias) {}
|
||||
|
||||
/// @brief Constructs deconvolution primitive (w/o bias, computes input paddings to match output size).
|
||||
/// @param id This primitive id.
|
||||
@ -214,8 +206,7 @@ struct deconvolution : public primitive_base<deconvolution> {
|
||||
tensor stride,
|
||||
tensor input_offset,
|
||||
tensor output_size,
|
||||
const padding& output_padding = padding(),
|
||||
bool gradient = false)
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
input_offset(input_offset),
|
||||
stride(stride),
|
||||
@ -223,8 +214,7 @@ struct deconvolution : public primitive_base<deconvolution> {
|
||||
output_size(output_size),
|
||||
groups(1),
|
||||
weights(weights),
|
||||
bias(std::vector<primitive_id>(0)),
|
||||
_gradient(gradient) {}
|
||||
bias(std::vector<primitive_id>(0)) {}
|
||||
|
||||
/// @brief Constructs deconvolution primitive (computes input paddings to match output size).
|
||||
/// @param id This primitive id.
|
||||
@ -300,12 +290,8 @@ struct deconvolution : public primitive_base<deconvolution> {
|
||||
|
||||
/// @brief On how many cards split the computation to.
|
||||
int32_t split() const { return static_cast<int32_t>(weights.size()); }
|
||||
/// @brief Indicates that deconvolution is used for convolution backward computation (convolution_grad_input)
|
||||
bool gradient() const { return _gradient; }
|
||||
|
||||
protected:
|
||||
bool _gradient;
|
||||
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
std::vector<std::reference_wrapper<const primitive_id>> ret;
|
||||
ret.reserve(weights.size() + bias.size());
|
||||
|
@ -92,13 +92,9 @@ struct eltwise : public primitive_base<eltwise> {
|
||||
eltwise_mode mode,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input, input2}, output_padding),
|
||||
output_calibration_factors(""),
|
||||
output_quantization_factor(1.0f),
|
||||
input_quantization_factors(0),
|
||||
mode(mode),
|
||||
coefficients(std::vector<float>(0)),
|
||||
stride(std::vector<tensor>(0)),
|
||||
inputs_calibration_factors(std::vector<primitive_id>(0)) {}
|
||||
stride(std::vector<tensor>(0)) {}
|
||||
|
||||
/// @brief Constructs eltwise primitive.
|
||||
/// @param id This primitive id.
|
||||
@ -115,13 +111,9 @@ struct eltwise : public primitive_base<eltwise> {
|
||||
eltwise_mode mode,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input, input2}, output_padding),
|
||||
output_calibration_factors(""),
|
||||
output_quantization_factor(1.0f),
|
||||
input_quantization_factors(0),
|
||||
mode(mode),
|
||||
coefficients(std::vector<float>(0)),
|
||||
stride(stride),
|
||||
inputs_calibration_factors(std::vector<primitive_id>(0)) {}
|
||||
stride(stride) {}
|
||||
|
||||
/// @brief Constructs eltwise primitive.
|
||||
/// @param id This primitive id.
|
||||
@ -134,13 +126,9 @@ struct eltwise : public primitive_base<eltwise> {
|
||||
data_types data_type,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, inputs, output_padding, optional_data_type{data_type}),
|
||||
output_calibration_factors(""),
|
||||
output_quantization_factor(1.0f),
|
||||
input_quantization_factors(0),
|
||||
mode(mode),
|
||||
coefficients(std::vector<float>(0)),
|
||||
stride(std::vector<tensor>(0)),
|
||||
inputs_calibration_factors(std::vector<primitive_id>(0)) {}
|
||||
stride(std::vector<tensor>(0)) {}
|
||||
|
||||
/// @brief Constructs eltwise primitive.
|
||||
/// @param id This primitive id.
|
||||
@ -151,13 +139,9 @@ struct eltwise : public primitive_base<eltwise> {
|
||||
eltwise_mode mode,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, inputs, output_padding),
|
||||
output_calibration_factors(""),
|
||||
output_quantization_factor(1.0f),
|
||||
input_quantization_factors(0),
|
||||
mode(mode),
|
||||
coefficients(std::vector<float>(0)),
|
||||
stride(std::vector<tensor>(0)),
|
||||
inputs_calibration_factors(std::vector<primitive_id>(0)) {}
|
||||
stride(std::vector<tensor>(0)) {}
|
||||
|
||||
/// @brief Constructs eltwise primitive.
|
||||
/// @param id This primitive id.
|
||||
@ -171,13 +155,9 @@ struct eltwise : public primitive_base<eltwise> {
|
||||
data_types data_type,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, inputs, output_padding, optional_data_type{data_type}),
|
||||
output_calibration_factors(""),
|
||||
output_quantization_factor(1.0f),
|
||||
input_quantization_factors(0),
|
||||
mode(mode),
|
||||
coefficients(coefficients),
|
||||
stride(std::vector<tensor>(0)),
|
||||
inputs_calibration_factors(std::vector<primitive_id>(0)) {
|
||||
stride(std::vector<tensor>(0)) {
|
||||
if (mode == eltwise_mode::sum && !coefficients.empty() && coefficients.size() != inputs.size()) {
|
||||
throw std::invalid_argument("Invalid eltwise sum coefficients count (should be equal to 0 or input.size)");
|
||||
}
|
||||
@ -186,31 +166,12 @@ struct eltwise : public primitive_base<eltwise> {
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief Primitive id containing output quanitization factors per output feature map.
|
||||
primitive_id output_calibration_factors;
|
||||
/// @brief Output quantization factor
|
||||
float output_quantization_factor;
|
||||
/// @brief List of quantization factors per input.
|
||||
std::vector<float> input_quantization_factors;
|
||||
/// @param mode Eltwise mode.
|
||||
eltwise_mode mode;
|
||||
/// @param coefficients Blob-wise coefficient for SUM operation.
|
||||
std::vector<float> coefficients;
|
||||
/// @brief Defines shift in input buffers between adjacent calculations of output values.
|
||||
std::vector<tensor> stride;
|
||||
/// @brief List of primitive ids containing input quantization factors per feature map, one primitive id for each input.
|
||||
const primitive_id_arr inputs_calibration_factors;
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
std::vector<std::reference_wrapper<const primitive_id>> ret;
|
||||
if (!output_calibration_factors.empty())
|
||||
ret.push_back(output_calibration_factors);
|
||||
|
||||
for (auto& icf : inputs_calibration_factors) ret.push_back(std::ref(icf));
|
||||
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
|
79
inference-engine/thirdparty/clDNN/api/embed.hpp
vendored
79
inference-engine/thirdparty/clDNN/api/embed.hpp
vendored
@ -1,79 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief
|
||||
/// @details Performs embedding upon input.
|
||||
/// @n\b Example:
|
||||
/// @n input_size = { 8, 1, 1, 75 };
|
||||
/// @n weights_size = {15, 1, 62, 1 };
|
||||
/// @n output_size = { 8, 75, 15, 1 };
|
||||
/// @par Algorithm:
|
||||
/// @par Where:
|
||||
struct embed : public primitive_base<embed> {
|
||||
CLDNN_DECLARE_PRIMITIVE(embed)
|
||||
|
||||
/// @brief Constructs embed primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param weights Primitive id containing weights data.
|
||||
/// @param bias Primitive id containing bias data.
|
||||
embed(
|
||||
const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const primitive_id& weights,
|
||||
const primitive_id& bias)
|
||||
: primitive_base(id, {input}), weights(weights), bias(bias) {}
|
||||
|
||||
/// @brief Constructs embed primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
embed(
|
||||
const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const primitive_id& weights)
|
||||
: primitive_base(id, {input}), weights(weights), bias("") {}
|
||||
|
||||
/// @brief Primitive id containing weights data.
|
||||
primitive_id weights;
|
||||
/// @brief Primitive id containing bias data.
|
||||
primitive_id bias;
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
if (bias.empty())
|
||||
return {weights};
|
||||
else
|
||||
return {weights, bias};
|
||||
}
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
||||
#pragma once
|
@ -1,59 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Performs backward fully connected layer (inner product) for input.
|
||||
struct fully_connected_grad_input : public primitive_base<fully_connected_grad_input> {
|
||||
CLDNN_DECLARE_PRIMITIVE(fully_connected_grad_input)
|
||||
|
||||
/// @brief Constructs fully connected layer grad for input.
|
||||
/// @param id This primitive id.
|
||||
/// @param input_grad Input gradient primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param weights Primitive id containing weights data.
|
||||
fully_connected_grad_input(
|
||||
const primitive_id& id,
|
||||
const primitive_id& input_grad,
|
||||
const primitive_id& input,
|
||||
const primitive_id& weights,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input_grad, input}, output_padding), weights(weights) {
|
||||
}
|
||||
|
||||
/// @brief Primitive id containing weights data.
|
||||
primitive_id weights;
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
return {weights};
|
||||
}
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
@ -1,115 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Performs backward fully connected layer (inner product) for weights and biases.
|
||||
struct fully_connected_grad_weights
|
||||
: public primitive_base<fully_connected_grad_weights> {
|
||||
CLDNN_DECLARE_PRIMITIVE(fully_connected_grad_weights)
|
||||
|
||||
/// @brief Constructs fully connected layer for weights and biases.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input gradient primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param weights Primitive id containing weights data.
|
||||
/// @param bias Primitive id containing bias data. Provide empty string if using Relu without bias.
|
||||
/// @param fc_grad Id of primitive which uses weights and biases updated in this primitive.
|
||||
/// This is for correct order of calculating. Leave empty if primitive is last in backward pass.
|
||||
fully_connected_grad_weights(const primitive_id& id,
|
||||
const primitive_id& input_grad,
|
||||
const primitive_id& input,
|
||||
const primitive_id& weights,
|
||||
const primitive_id& bias = "",
|
||||
const primitive_id& fc_grad = "",
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input_grad, input}, output_padding),
|
||||
weights(weights),
|
||||
bias(bias),
|
||||
fc_grad(fc_grad),
|
||||
prev_weights_grad(""),
|
||||
prev_bias_grad("") {}
|
||||
|
||||
/// @brief Constructs fully connected layer for weights and biases with momentum optimizer.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input gradient primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param weights Primitive id containing weights data.
|
||||
/// @param bias Primitive id containing bias data. Provide empty string if using Relu without bias.
|
||||
/// @param prev_weights_grad Id of primitive which contains weights gradient data calculated in previous iteration. Used in momentum optimizer.
|
||||
/// @param prev_bias_grad Id of primitive which contains bias gradient data calculated in previous iteration. Used in momentum optimizer.
|
||||
/// @param fc_grad Id of primitive which uses weights and biases updated in this primitive. This is for correct order of calculating.
|
||||
fully_connected_grad_weights(const primitive_id& id,
|
||||
const primitive_id& input_grad,
|
||||
const primitive_id& input,
|
||||
const primitive_id& weights,
|
||||
const primitive_id& bias,
|
||||
const primitive_id& prev_weights_grad,
|
||||
const primitive_id& prev_bias_grad,
|
||||
const primitive_id& fc_grad = "",
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input_grad, input}, output_padding),
|
||||
weights(weights),
|
||||
bias(bias),
|
||||
fc_grad(fc_grad),
|
||||
prev_weights_grad(prev_weights_grad),
|
||||
prev_bias_grad(prev_bias_grad) {}
|
||||
|
||||
/// @brief Primitive id containing weights data.
|
||||
primitive_id weights;
|
||||
/// @brief Primitive id containing bias data.
|
||||
primitive_id bias;
|
||||
/// @brief Primitive id containing fully connected gradient data.
|
||||
primitive_id fc_grad;
|
||||
/// @brief Id of primitive containing weights gradient data calculated in previous iteration. It's memory size should be same as weights.
|
||||
primitive_id prev_weights_grad;
|
||||
/// @brief Id of primitive containing bias gradient data calculated in previous iteration. It's memory size should be same as biases.
|
||||
primitive_id prev_bias_grad;
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
std::vector<std::reference_wrapper<const primitive_id>> ret;
|
||||
ret.reserve(1 + !bias.empty() + !fc_grad.empty() + !prev_weights_grad.empty() + !prev_bias_grad.empty());
|
||||
|
||||
ret.push_back(weights);
|
||||
if (!bias.empty())
|
||||
ret.push_back(bias);
|
||||
|
||||
if (!prev_weights_grad.empty())
|
||||
ret.push_back(prev_weights_grad);
|
||||
if (!prev_bias_grad.empty())
|
||||
ret.push_back(prev_bias_grad);
|
||||
if (!fc_grad.empty())
|
||||
ret.push_back(fc_grad);
|
||||
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
@ -1,109 +0,0 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
|
||||
#include "primitive.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
/// @brief Axis which index_select primitive will index.
|
||||
enum class index_select_axis_name {
|
||||
along_b,
|
||||
along_f,
|
||||
along_y,
|
||||
along_x
|
||||
};
|
||||
|
||||
/// @brief Select index, which will be copied to the output..
|
||||
///
|
||||
/// @details Applies index selecting along specified dimension. The indices, which will be copied are specifed by
|
||||
/// by @c indices.
|
||||
/// @n
|
||||
/// @n Example:
|
||||
/// @n <tt>input_sizes = (1, 2, 4, 2)</tt>
|
||||
/// @n <tt>input_values = (a, b, c, d)</tt>
|
||||
/// @n <tt> (e, f, g, h)</tt>
|
||||
/// @n <tt>indices_sizes = (1, 1, 6, 1)</tt>
|
||||
/// @n <tt>indices_values = {0, 0, 1, 1, 3, 3}</tt>
|
||||
/// @n For axis: along_x:
|
||||
/// @n <tt>output_sizes = (1, 2, 6, 2)</tt>
|
||||
/// @n <tt>output_values = (a, a, b, b, d, d)</tt>
|
||||
/// @n <tt> (e, e, f, f, h, h)</tt>
|
||||
/// @n
|
||||
/// @n The resulting output will have sizes equal to input_size with changed concrete tensor size to inidices x size.
|
||||
/// @n
|
||||
/// @n@b Requirements:
|
||||
/// @n - @c input must be a valid primitive_id, which output's format is bfyx/yxfb;
|
||||
/// @n - @c indices must be a valid primitive_id, which output's layout is: (bfyx/yxfb, i32, {1, 1, indicies_size, 1})
|
||||
/// @n - @c axis - valid index_select_axis_name instance.
|
||||
/// @n Breaking any of this conditions will cause exeption throw.
|
||||
struct index_select : public primitive_base<index_select> {
|
||||
CLDNN_DECLARE_PRIMITIVE(index_select)
|
||||
|
||||
/// @brief Constructs index_select primitive / layer.
|
||||
///
|
||||
/// @param id An identifier of new primitive.
|
||||
/// @param input An identifier of primitive, which is an input for newly created
|
||||
/// index_select primitive.
|
||||
/// @param indicies An identifer of primitive, which have indices in memory distributed along x.
|
||||
/// @param axis Axis of index selecting.
|
||||
/// @param output_padding Optional padding for output from primitive.
|
||||
index_select(
|
||||
const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const primitive_id& indices,
|
||||
index_select_axis_name axis = index_select_axis_name::along_b,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input, indices}, output_padding), axis({axis}), reverse(false) {}
|
||||
|
||||
/// @brief Constructs index_select primitive / layer.
|
||||
///
|
||||
/// @param id An identifier of new primitive.
|
||||
/// @param input An identifier of primitive, which is an input for newly created
|
||||
/// index_select primitive.
|
||||
/// @param axis Axis of index selecting.
|
||||
/// @param output_padding Optional padding for output from primitive.
|
||||
index_select(
|
||||
const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
index_select_axis_name axis = index_select_axis_name::along_b,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding), axis({axis}), reverse(true) {}
|
||||
|
||||
/// @brief Constructs index_select primitive / layer.
|
||||
///
|
||||
/// @param id An identifier of new primitive.
|
||||
/// @param input An identifier of primitive, which is an input for newly created
|
||||
/// index_select primitive.
|
||||
/// @param axis Vector of axes of index selecting.
|
||||
/// @param output_padding Optional padding for output from primitive.
|
||||
index_select(
|
||||
const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const std::vector<index_select_axis_name>& axis = {index_select_axis_name::along_b},
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding), axis(axis), reverse(true) {}
|
||||
|
||||
/// @brief A list of axes of index selecting
|
||||
std::vector<index_select_axis_name> axis;
|
||||
/// @brief Do index_select in reverse order on axis/axes.
|
||||
bool reverse;
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
@ -1,58 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Returns values from data on which given indices are pointing at.
|
||||
struct lookup_table : public primitive_base<lookup_table> {
|
||||
CLDNN_DECLARE_PRIMITIVE(lookup_table)
|
||||
|
||||
/// @brief Enum type to specify axis to maximize/minimize along.
|
||||
enum axis_name { batch, feature, x, y, xyf };
|
||||
|
||||
/// @brief Constructs lookup_table primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param input_data Input data primitive id.
|
||||
/// @param input_indices Input indices primitive id.
|
||||
/// @param axis Axis to return values from.
|
||||
lookup_table(const primitive_id& id,
|
||||
const primitive_id& input_data,
|
||||
const primitive_id& input_indices,
|
||||
axis_name axis = axis_name::xyf,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input_data, input_indices}, output_padding),
|
||||
axis(axis),
|
||||
with_axis(axis == axis_name::xyf ? false : true) {}
|
||||
|
||||
/// @brief Axis to return values from. If not set, returns data which index is pointing at in the flattened x, y, f dimensions for each batch.
|
||||
axis_name axis;
|
||||
/// @brief Indicates that the primitive has user defined axis to return values from.
|
||||
bool with_axis;
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
@ -113,12 +113,6 @@ struct network {
|
||||
/// @brief Provides user-supplied @ref memory for output primitives defined by user in source @ref topology.
|
||||
void set_output_memory(const primitive_id& id, const memory& mem) const;
|
||||
|
||||
/// @brief Sets learning rate for training primitives.
|
||||
void set_learning_rate(const float lr);
|
||||
|
||||
/// @brief Return learning rate.
|
||||
float get_learning_rate();
|
||||
|
||||
/// @brief Return stream id.
|
||||
uint16_t get_stream_id();
|
||||
|
||||
|
@ -1,51 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Performs scale primitive backward for input.
|
||||
struct scale_grad_input : public primitive_base<scale_grad_input> {
|
||||
CLDNN_DECLARE_PRIMITIVE(scale_grad_input)
|
||||
|
||||
/// @brief Constructs scale_grad_input.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param scale_input Scale input primitive id with values needed for product computation.
|
||||
scale_grad_input(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const primitive_id& scale_input, // should be bfyx or yxfb, where each dimension can be 1, if all
|
||||
// dimensions are 1 then this is scalar
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input, scale_input}, output_padding) {}
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {}; }
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
@ -1,131 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Performs scale layer backward for scale_input and biases.
|
||||
struct scale_grad_weights : public primitive_base<scale_grad_weights> {
|
||||
CLDNN_DECLARE_PRIMITIVE(scale_grad_weights)
|
||||
|
||||
/// @brief Constructs scale_grad_weights primitive without bias.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id. Same as input for scale forward.
|
||||
/// @param input_grad Input gradient primitive id.
|
||||
/// @param scale_input Scale input primitive id.
|
||||
/// @param scale_grad Id of primitive which uses weights and biases updated in this primitive. This is for correct order of calculating.
|
||||
scale_grad_weights(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const primitive_id& input_grad,
|
||||
const primitive_id& scale_input, // should be one number per feature
|
||||
const primitive_id& scale_grad = "", // leave empty if this is last primitive in backward pass
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input, input_grad}, output_padding),
|
||||
scale_input(scale_input),
|
||||
bias(""),
|
||||
prev_scale_grad(""),
|
||||
prev_bias_grad(""),
|
||||
scale_grad(scale_grad) {}
|
||||
|
||||
/// @brief Constructs scale_grad_weights primitive with optional adding bias.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id. Same as input for scale forward.
|
||||
/// @param input_grad Input gradient primitive id.
|
||||
/// @param scale_input Scale input primitive id.
|
||||
/// @param bias Primitive id containing bias data.
|
||||
/// @param scale_grad Id of primitive which uses weights and biases updated in this primitive. This is for correct order of calculating.
|
||||
scale_grad_weights(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const primitive_id& input_grad,
|
||||
const primitive_id& scale_input, // should be one number per feature
|
||||
const primitive_id& bias, // should be same size as scale_input
|
||||
const primitive_id& scale_grad = "", // leave empty if this is last primitive in backward pass
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input, input_grad}, output_padding),
|
||||
scale_input(scale_input),
|
||||
bias(bias),
|
||||
prev_scale_grad(""),
|
||||
prev_bias_grad(""),
|
||||
scale_grad(scale_grad) {}
|
||||
|
||||
/// @brief Constructs scale_grad_weights primitive with optional bias and momentum optimizer.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id. Same as input for scale forward.
|
||||
/// @param input_grad Input gradient primitive id.
|
||||
/// @param scale_input Scale input primitive id.
|
||||
/// @param bias Primitive id containing bias data.
|
||||
/// @param prev_scale_grad Id of primitive which contains scale gradient data calculated in previous iteration. Used in momentum optimizer.
|
||||
/// @param prev_bias_grad Id of primitive which contains bias gradient data calculated in previous iteration. Used in momentum optimizer.
|
||||
/// @param scale_grad Id of primitive which uses weights and biases updated in this primitive. This is for correct order of calculating.
|
||||
scale_grad_weights(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const primitive_id& input_grad,
|
||||
const primitive_id& scale_input, // should be one number per feature
|
||||
const primitive_id& bias, // should be same size as scale_input
|
||||
const primitive_id& prev_scale_grad,
|
||||
const primitive_id& prev_bias_grad, // leave empty if bias not specified
|
||||
const primitive_id& scale_grad = "", // leave empty if this is last primitive in backward pass
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input, input_grad}, output_padding),
|
||||
scale_input(scale_input),
|
||||
bias(bias),
|
||||
prev_scale_grad(prev_scale_grad),
|
||||
prev_bias_grad(prev_bias_grad),
|
||||
scale_grad(scale_grad) {}
|
||||
|
||||
/// @brief Scale input primitive id.
|
||||
primitive_id scale_input;
|
||||
/// @brief Primitive id containing bias data.
|
||||
primitive_id bias;
|
||||
/// @brief Primitive id containing scale gradient data calculated in previous iteration.
|
||||
primitive_id prev_scale_grad;
|
||||
/// @brief Primitive id containing bias gradient data calculated in previous iteration.
|
||||
primitive_id prev_bias_grad;
|
||||
/// @brief Primitive id which uses weights and biases updated in this primitive.
|
||||
primitive_id scale_grad;
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
std::vector<std::reference_wrapper<const primitive_id>> ret;
|
||||
ret.reserve(1 + !bias.empty() + !prev_scale_grad.empty() + !prev_bias_grad.empty());
|
||||
|
||||
ret.push_back(scale_input);
|
||||
if (!bias.empty())
|
||||
ret.push_back(bias);
|
||||
if (!prev_scale_grad.empty())
|
||||
ret.push_back(prev_scale_grad);
|
||||
if (!prev_bias_grad.empty())
|
||||
ret.push_back(prev_bias_grad);
|
||||
if (!scale_grad.empty())
|
||||
ret.push_back(scale_grad);
|
||||
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
@ -1,47 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Backward pass for Softmax log loss.
|
||||
/// @details The output values are the same as input_prob, except for the correct one based on the label which is subtracted by 1.
|
||||
struct softmax_loss_grad : public primitive_base<softmax_loss_grad> {
|
||||
CLDNN_DECLARE_PRIMITIVE(softmax_loss_grad)
|
||||
|
||||
/// @brief Constructs softmax_loss_grad primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param input_prob Input primitive id.
|
||||
/// @param labels Labels primitive id.
|
||||
softmax_loss_grad(const primitive_id& id,
|
||||
const primitive_id& input_prob,
|
||||
const primitive_id& labels,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input_prob, labels}, output_padding) {}
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
@ -1,115 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "api/primitive.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Primitives that fuses convolution, batch norm, scale and optionally Relu.
|
||||
struct fused_conv_bn_scale : public primitive_base<fused_conv_bn_scale> {
|
||||
CLDNN_DECLARE_PRIMITIVE(fused_conv_bn_scale)
|
||||
|
||||
/// @brief Constructs convolution primitive fused with batch norm and scale.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param weights List of primitive ids containing weights data.
|
||||
/// @param bias List of primitive ids containing bias data.
|
||||
/// @param epsilon Small number to protect from 0 dividing.
|
||||
/// @param scale_input Scale input primitive id with values needed for product computation. Used in fused scale part.
|
||||
/// @param scale_bias Primitive id containing bias data for fused scale part.
|
||||
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
|
||||
/// where (0,0) point of the convolution window should start calculations.
|
||||
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
|
||||
/// @param inv_variance Primitive id containing inverted variance calculated in this primitive. Used in fused batch norm part.
|
||||
/// @param with_activation Enable Relu activation.
|
||||
/// @param activation_slp Relu activation slope.
|
||||
fused_conv_bn_scale(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const std::vector<primitive_id>& weights,
|
||||
const std::vector<primitive_id>& bias,
|
||||
float epsilon,
|
||||
const primitive_id& scale_input,
|
||||
const primitive_id& scale_bias = "",
|
||||
tensor stride = {1, 1, 1, 1},
|
||||
tensor dilation = {1, 1, 1, 1},
|
||||
tensor input_offset = {0, 0, 0, 0},
|
||||
const primitive_id& inv_variance = "",
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input, scale_input}, output_padding),
|
||||
input_offset(input_offset),
|
||||
stride(stride),
|
||||
dilation(dilation),
|
||||
with_output_size(false),
|
||||
scale_bias(scale_bias),
|
||||
inv_variance(inv_variance),
|
||||
epsilon(epsilon),
|
||||
weights(weights),
|
||||
bias(bias) {
|
||||
if ((bias.size() != 0) && (weights.size() != bias.size()))
|
||||
throw std::runtime_error("convolution's weights/bias count does not match");
|
||||
}
|
||||
|
||||
/// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
|
||||
tensor input_offset;
|
||||
/// @brief Defines shift in input buffer between adjacent calculations of output values.
|
||||
tensor stride;
|
||||
/// @brief Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
|
||||
/// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
|
||||
/// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
|
||||
tensor dilation;
|
||||
/// @brief Indicates that the primitive has user-defined output size (non-zero value).
|
||||
bool with_output_size;
|
||||
/// @brief User-defined output data size of the primitive (w/o padding).
|
||||
tensor output_size;
|
||||
/// @brief Primitive id containing scale bias data for fused convolution.
|
||||
primitive_id scale_bias;
|
||||
/// @brief Primitive id containing inverted variance used in future gradient computing for fused convolution.
|
||||
primitive_id inv_variance;
|
||||
/// @brief Epsilon for fused convolution.
|
||||
float epsilon;
|
||||
/// @brief On how many cards split the computation to.
|
||||
int32_t split() const { return static_cast<int32_t>(weights.size()); }
|
||||
/// @brief List of primitive ids containing weights data.
|
||||
const primitive_id_arr weights;
|
||||
/// @brief List of primitive ids containing bias data.
|
||||
const primitive_id_arr bias;
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
std::vector<std::reference_wrapper<const primitive_id>> ret;
|
||||
ret.reserve(weights.size() + bias.size() + !scale_bias.empty() + !inv_variance.empty());
|
||||
for (auto& w : weights) ret.push_back(std::ref(w));
|
||||
for (auto& b : bias) ret.push_back(std::ref(b));
|
||||
if (!scale_bias.empty())
|
||||
ret.push_back(scale_bias);
|
||||
if (!inv_variance.empty())
|
||||
ret.push_back(inv_variance);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
@ -37,9 +37,6 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
|
||||
/// @param input Input primitive id.
|
||||
/// @param weights List of primitive ids containing weights data.
|
||||
/// @param bias List of primitive ids containing bias data.
|
||||
/// @param w_quantization_factor List of primitive ids containing weights quanitization factors per output feature map.
|
||||
/// @param output_calibration_factors List of primitive ids output containing calibration factors per output feature map.
|
||||
/// @param i_quantization_factor Input quantization factor
|
||||
/// @param input_offset Defines a shift, relative to (0,0) position of the input buffer,
|
||||
/// where (0,0) point of the convolution window should start calculations.
|
||||
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
|
||||
@ -57,11 +54,6 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
|
||||
eltwise_mode mode,
|
||||
const std::vector<primitive_id>& weights,
|
||||
const std::vector<primitive_id>& bias,
|
||||
const std::vector<primitive_id>& conv_w_quantization_factor,
|
||||
const std::vector<primitive_id>& conv_output_calibration_factors,
|
||||
const float conv_i_quantization_factor,
|
||||
const float non_conv_scale,
|
||||
const primitive_id& eltw_output_calibration_factors,
|
||||
const std::vector<tensor>& eltw_stride,
|
||||
tensor stride = {1, 1, 1, 1},
|
||||
tensor input_offset = {0, 0, 0, 0},
|
||||
@ -74,18 +66,10 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
|
||||
optional_data_type output_data_type = {})
|
||||
: primitive_base(id, {input, input2}, output_padding, output_data_type),
|
||||
conv((primitive_id_arr)weights,
|
||||
(primitive_id_arr)bias,
|
||||
(primitive_id_arr)conv_w_quantization_factor,
|
||||
(primitive_id_arr)conv_output_calibration_factors),
|
||||
eltw(eltw_output_calibration_factors),
|
||||
non_conv_scale(non_conv_scale),
|
||||
(primitive_id_arr)bias),
|
||||
eltw(),
|
||||
conv_weights(weights),
|
||||
conv_bias(bias),
|
||||
conv_weights_quantization_factors(conv_w_quantization_factor),
|
||||
conv_output_calibration_factors(conv_output_calibration_factors) {
|
||||
conv.input_quantization_factor = conv_i_quantization_factor;
|
||||
conv.output_quantization_factor = 1.0f;
|
||||
|
||||
conv_bias(bias) {
|
||||
conv.input_offset = input_offset;
|
||||
conv.stride = stride;
|
||||
conv.dilation = dilation;
|
||||
@ -100,10 +84,6 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
|
||||
|
||||
if ((bias.size() != 0) && (weights.size() != bias.size()))
|
||||
throw std::runtime_error("convolution's weights/bias count does not match");
|
||||
if (conv.output_calibration_factors.size()) {
|
||||
if ((weights.size() != 0) && (weights.size() != conv.weights_quantization_factors.size()))
|
||||
throw std::runtime_error("convolution's weights count does not match quantization factors count");
|
||||
}
|
||||
}
|
||||
|
||||
struct conv_data {
|
||||
@ -111,14 +91,6 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
|
||||
const primitive_id_arr weights;
|
||||
/// @brief List of primitive ids containing bias data.
|
||||
const primitive_id_arr bias;
|
||||
/// @brief List of primitive ids containing weights quanitization factors per output feature map.
|
||||
const primitive_id_arr weights_quantization_factors;
|
||||
/// @brief List of primitive ids containing output quanitization factors per output feature map for convolution.
|
||||
const primitive_id_arr output_calibration_factors;
|
||||
/// @brief Input quantization factor for convolution
|
||||
float input_quantization_factor;
|
||||
/// @brief Output quantization factor for convolution
|
||||
float output_quantization_factor;
|
||||
/// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
|
||||
tensor input_offset;
|
||||
/// @brief Defines shift in input buffer between adjacent calculations of output values.
|
||||
@ -137,20 +109,12 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
|
||||
tensor output_size;
|
||||
|
||||
conv_data(const primitive_id_arr& weights,
|
||||
const primitive_id_arr& bias,
|
||||
const primitive_id_arr& weights_quantization_factors,
|
||||
const primitive_id_arr& output_calibration_factors)
|
||||
const primitive_id_arr& bias)
|
||||
: weights(weights),
|
||||
bias(bias),
|
||||
weights_quantization_factors(weights_quantization_factors),
|
||||
output_calibration_factors(output_calibration_factors) {}
|
||||
bias(bias) {}
|
||||
} conv;
|
||||
|
||||
struct eltw_data {
|
||||
/// @brief Primitive id containing output quanitization factors per output feature map.
|
||||
primitive_id output_calibration_factors;
|
||||
/// @brief Output quantization factor for eltwise
|
||||
float output_quantization_factor;
|
||||
/// @param mode Eltwise mode.
|
||||
eltwise_mode mode;
|
||||
/// @brief Enable Relu activation.
|
||||
@ -159,22 +123,11 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
|
||||
float activation_negative_slope;
|
||||
/// @brief Defines shift in input buffers between adjacent calculations of output values.
|
||||
std::vector<tensor> stride;
|
||||
explicit eltw_data(const primitive_id& output_calibration_factors)
|
||||
: output_calibration_factors(output_calibration_factors) {}
|
||||
} eltw;
|
||||
|
||||
/// @brief On how many cards split the computation to.
|
||||
int32_t split() const { return static_cast<int32_t>(conv.weights.size()); }
|
||||
|
||||
// FIXME: In fact, that should be needed for any EltWise primitive, not
|
||||
// only the fused one. What's more important, these scales should be
|
||||
// separate for different inputs and probably per-channel, not per
|
||||
// primitive.
|
||||
//
|
||||
// I'm only needing a scalar for my particular task, so let's hack like
|
||||
// this in the meantime. The final design is still to be investigated.
|
||||
float non_conv_scale = 1.0f;
|
||||
|
||||
/// @brief Is optimization that output contains data from second input ON ?
|
||||
bool second_input_in_output = false;
|
||||
bool depth_to_space_already_fused = false;
|
||||
@ -182,21 +135,13 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
|
||||
protected:
|
||||
const primitive_id_arr conv_weights;
|
||||
const primitive_id_arr conv_bias;
|
||||
const primitive_id_arr conv_weights_quantization_factors;
|
||||
const primitive_id_arr conv_output_calibration_factors;
|
||||
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
std::vector<std::reference_wrapper<const primitive_id>> ret;
|
||||
ret.reserve(conv.weights.size() + conv.bias.size() + conv.weights_quantization_factors.size() +
|
||||
conv.output_calibration_factors.size() + (eltw.output_calibration_factors.empty() ? 0 : 1));
|
||||
ret.reserve(conv.weights.size() + conv.bias.size());
|
||||
|
||||
for (auto& w : conv.weights) ret.push_back(std::ref(w));
|
||||
for (auto& b : conv.bias) ret.push_back(std::ref(b));
|
||||
for (auto& q : conv.weights_quantization_factors) ret.push_back(std::ref(q));
|
||||
for (auto& q : conv.output_calibration_factors) ret.push_back(std::ref(q));
|
||||
|
||||
if (!eltw.output_calibration_factors.empty())
|
||||
ret.push_back(eltw.output_calibration_factors);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -67,7 +67,6 @@ inline uint8_t GetActivationAdditionalParamsNumber(ActivationFunction func) {
|
||||
break;
|
||||
case ActivationFunction::RELU_NEGATIVE_SLOPE:
|
||||
case ActivationFunction::ELU:
|
||||
case ActivationFunction::RELU_NEGATIVE_SLOPE_GRAD:
|
||||
paramsNum = 1;
|
||||
break;
|
||||
default:
|
||||
|
@ -25,8 +25,6 @@ enum class KernelType {
|
||||
UNKNOWN,
|
||||
ARG_MAX_MIN,
|
||||
AVERAGE_UNPOOLING,
|
||||
BATCH_NORM_GRAD,
|
||||
LOOKUP_TABLE,
|
||||
CONVOLUTION,
|
||||
DECONVOLUTION,
|
||||
LRN,
|
||||
@ -38,9 +36,7 @@ enum class KernelType {
|
||||
SOFT_MAX,
|
||||
ELTWISE,
|
||||
SCALE,
|
||||
FUSED_CONV_BN_SCALE,
|
||||
FUSED_CONV_ELTWISE,
|
||||
TABLE_LOOKUP,
|
||||
REORDER,
|
||||
RESHAPE,
|
||||
PERMUTE,
|
||||
@ -49,21 +45,14 @@ enum class KernelType {
|
||||
REGION_YOLO,
|
||||
REORG_YOLO,
|
||||
MAX_UNPOOLING,
|
||||
CONVOLUTION_GRAD_WEIGHTS,
|
||||
SCALE_GRAD_WEIGHTS,
|
||||
MVN,
|
||||
FULLY_CONNECTED_GRAD_INPUT,
|
||||
FULLY_CONNECTED_GRAD_WEIGHTS,
|
||||
LSTM_GEMM,
|
||||
LSTM_ELT,
|
||||
EMBED,
|
||||
SOFT_MAX_LOSS_GRAD,
|
||||
BORDER,
|
||||
TILE,
|
||||
SELECT,
|
||||
BROADCAST,
|
||||
GEMM,
|
||||
INDEX_SELECT,
|
||||
PYRAMID_ROI_ALIGN,
|
||||
CONTRACT,
|
||||
ONE_HOT,
|
||||
@ -133,8 +122,6 @@ enum class ActivationFunction {
|
||||
SQRT,
|
||||
LINEAR,
|
||||
ELU,
|
||||
RELU_GRAD,
|
||||
RELU_NEGATIVE_SLOPE_GRAD,
|
||||
SIN,
|
||||
ASIN,
|
||||
SINH,
|
||||
@ -155,7 +142,6 @@ enum class ActivationFunction {
|
||||
NEGATIVE,
|
||||
NOT,
|
||||
POW,
|
||||
NONE_GRAD,
|
||||
ERF,
|
||||
HARD_SIGMOID,
|
||||
RECIPROCAL,
|
||||
|
@ -103,9 +103,6 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con
|
||||
FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point,
|
||||
DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params));
|
||||
|
||||
if (newParams.gradient)
|
||||
kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
|
||||
|
||||
if (!newParams.inputActivationParams.empty()) {
|
||||
kernel.arguments.push_back({ArgumentDescriptor::Types::SLOPE, 0});
|
||||
}
|
||||
|
@ -34,7 +34,6 @@ ParamsKey ActivationKernelOpt::GetSupportedKey() const {
|
||||
k.EnableAllOutputLayout();
|
||||
k.EnableTensorOffset();
|
||||
k.EnableBatching();
|
||||
k.EnableGradient();
|
||||
return k;
|
||||
}
|
||||
|
||||
|
@ -38,7 +38,6 @@ ParamsKey ActivationKernelRef::GetSupportedKey() const {
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBatching();
|
||||
k.EnableGradient();
|
||||
return k;
|
||||
}
|
||||
|
||||
|
@ -1,88 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "batch_norm_kernel_base.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace kernel_selector {
|
||||
bool BatchNormKernelBase::Validate(const Params& p, const optional_params& o) const {
|
||||
if (p.GetType() != KernelType::BATCH_NORM_GRAD || o.GetType() != KernelType::BATCH_NORM_GRAD) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
JitConstants BatchNormKernelBase::GetJitConstants(const batch_norm_params& params) const {
|
||||
JitConstants jit = MakeBaseParamsJitConstants(params);
|
||||
|
||||
jit.AddConstant(MakeJitConstant("EPSILON", params.batchNormParams.epsilon));
|
||||
if (params.batchNormParams.with_inv_var)
|
||||
jit.AddConstant(MakeJitConstant("FORWARD", 1));
|
||||
if (params.batchNormParams.with_scale_shift)
|
||||
jit.AddConstant(MakeJitConstant("SCALE_SHIFT", 1));
|
||||
if (params.batchNormParams.with_mean_var_out)
|
||||
jit.AddConstant(MakeJitConstant("MEAN_VAR_OUT", 1));
|
||||
|
||||
return jit;
|
||||
}
|
||||
|
||||
BatchNormKernelBase::DispatchData BatchNormKernelBase::SetDefault(const batch_norm_params& params) const {
|
||||
DispatchData kd;
|
||||
|
||||
kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
|
||||
|
||||
kd.gws0 = params.inputs[0].Batch().v;
|
||||
kd.gws1 = params.inputs[0].Feature().v;
|
||||
kd.gws2 = 1;
|
||||
|
||||
kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(256));
|
||||
while (kd.gws0 % kd.lws0 != 0) {
|
||||
--kd.lws0;
|
||||
}
|
||||
kd.lws1 = 1;
|
||||
kd.lws2 = 1;
|
||||
|
||||
return kd;
|
||||
}
|
||||
|
||||
KernelsData BatchNormKernelBase::GetCommonKernelsData(const Params& params,
|
||||
const optional_params& options,
|
||||
float estimatedTime) const {
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const batch_norm_params& orgParams = static_cast<const batch_norm_params&>(params);
|
||||
|
||||
DispatchData runInfo = SetDefault(orgParams);
|
||||
|
||||
KernelData kd = KernelData::Default<batch_norm_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
int inputs_num = 1 + orgParams.batchNormParams.with_inv_var + 2 * orgParams.batchNormParams.with_scale_shift +
|
||||
2 * orgParams.batchNormParams.with_mean_var_out;
|
||||
FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, inputs_num);
|
||||
|
||||
kd.estimatedTime = estimatedTime;
|
||||
|
||||
return {kd};
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,66 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// batch_norm_params
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct batch_norm_params : public base_params {
|
||||
batch_norm_params() : base_params(KernelType::BATCH_NORM_GRAD) {}
|
||||
|
||||
struct DedicatedParams {
|
||||
float epsilon;
|
||||
bool with_inv_var;
|
||||
bool with_scale_shift;
|
||||
bool with_mean_var_out = false;
|
||||
};
|
||||
|
||||
DedicatedParams batchNormParams;
|
||||
|
||||
virtual ParamsKey GetParamsKey() const {
|
||||
return base_params::GetParamsKey();
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// batch_norm_optional_params
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct batch_norm_optional_params : optional_params {
|
||||
batch_norm_optional_params() : optional_params(KernelType::BATCH_NORM_GRAD) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// BatchNormKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class BatchNormKernelBase : public common_kernel_base {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
virtual ~BatchNormKernelBase() {}
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
||||
protected:
|
||||
bool Validate(const Params& params, const optional_params& options) const override;
|
||||
KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const;
|
||||
virtual JitConstants GetJitConstants(const batch_norm_params& params) const;
|
||||
virtual DispatchData SetDefault(const batch_norm_params& params) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,41 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "batch_norm_kernel_ref.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
ParamsKey BatchNormKernelRef::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::F16);
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableOutputDataType(Datatype::F16);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
k.EnableInputLayout(DataLayout::bfyx);
|
||||
k.EnableInputLayout(DataLayout::byxf);
|
||||
k.EnableInputLayout(DataLayout::yxfb);
|
||||
k.EnableOutputLayout(DataLayout::bfyx);
|
||||
k.EnableOutputLayout(DataLayout::byxf);
|
||||
k.EnableOutputLayout(DataLayout::yxfb);
|
||||
k.EnableBatching();
|
||||
return k;
|
||||
}
|
||||
|
||||
KernelsData BatchNormKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,30 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "batch_norm_kernel_base.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
class BatchNormKernelRef : public BatchNormKernelBase {
|
||||
public:
|
||||
BatchNormKernelRef() : BatchNormKernelBase("batch_norm_gpu_ref") {}
|
||||
virtual ~BatchNormKernelRef() {}
|
||||
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,29 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "batch_norm_kernel_selector.h"
|
||||
#include "batch_norm_kernel_ref.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
batch_norm_kernel_selector::batch_norm_kernel_selector() {
|
||||
Attach<BatchNormKernelRef>();
|
||||
}
|
||||
|
||||
KernelsData batch_norm_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
|
||||
return GetNaiveBestKernel(params, options, KernelType::BATCH_NORM_GRAD);
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,35 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_selector.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
class batch_norm_kernel_selector : public kernel_selector_base {
|
||||
public:
|
||||
static batch_norm_kernel_selector& Instance() {
|
||||
static batch_norm_kernel_selector instance_;
|
||||
return instance_;
|
||||
}
|
||||
|
||||
batch_norm_kernel_selector();
|
||||
|
||||
virtual ~batch_norm_kernel_selector() {}
|
||||
|
||||
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,72 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "batch_norm_grad_kernel_base.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
bool BatchNormGradKernelBase::Validate(const Params& p, const optional_params& o) const {
|
||||
if (p.GetType() != KernelType::BATCH_NORM_GRAD ||
|
||||
o.GetType() != KernelType::BATCH_NORM_GRAD) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
JitConstants BatchNormGradKernelBase::GetJitConstants(const batch_norm_grad_params& params) const {
|
||||
JitConstants jit = MakeBaseParamsJitConstants(params);
|
||||
return jit;
|
||||
}
|
||||
|
||||
BatchNormGradKernelBase::DispatchData BatchNormGradKernelBase::SetDefault(const batch_norm_grad_params& params) const {
|
||||
DispatchData kd;
|
||||
|
||||
kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
|
||||
|
||||
kd.gws0 = params.inputs[0].Batch().v;
|
||||
kd.gws1 = params.inputs[0].Feature().v;
|
||||
kd.gws2 = 1;
|
||||
|
||||
kd.lws0 = params.inputs[0].Batch().v;
|
||||
kd.lws1 = 1;
|
||||
kd.lws2 = 1;
|
||||
|
||||
return kd;
|
||||
}
|
||||
|
||||
KernelsData BatchNormGradKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimatedTime) const {
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const batch_norm_grad_params& orgParams = static_cast<const batch_norm_grad_params&>(params);
|
||||
|
||||
DispatchData runInfo = SetDefault(orgParams);
|
||||
|
||||
KernelData kd = KernelData::Default<batch_norm_grad_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 3);
|
||||
|
||||
kd.estimatedTime = estimatedTime;
|
||||
|
||||
return {kd};
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,57 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// batch_norm_grad_params
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct batch_norm_grad_params : public base_params {
|
||||
batch_norm_grad_params() : base_params(KernelType::BATCH_NORM_GRAD) {}
|
||||
|
||||
virtual ParamsKey GetParamsKey() const {
|
||||
return base_params::GetParamsKey();
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// batch_norm_grad_optional_params
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct batch_norm_grad_optional_params : optional_params {
|
||||
batch_norm_grad_optional_params() : optional_params(KernelType::BATCH_NORM_GRAD) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// BatchNormGradKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class BatchNormGradKernelBase : public common_kernel_base {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
virtual ~BatchNormGradKernelBase() {}
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
||||
protected:
|
||||
bool Validate(const Params& params, const optional_params& options) const override;
|
||||
KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const;
|
||||
virtual JitConstants GetJitConstants(const batch_norm_grad_params& params) const;
|
||||
virtual DispatchData SetDefault(const batch_norm_grad_params& params) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,41 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "batch_norm_grad_kernel_ref.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
ParamsKey BatchNormGradKernelRef::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::F16);
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableOutputDataType(Datatype::F16);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
k.EnableInputLayout(DataLayout::bfyx);
|
||||
k.EnableInputLayout(DataLayout::byxf);
|
||||
k.EnableInputLayout(DataLayout::yxfb);
|
||||
k.EnableOutputLayout(DataLayout::bfyx);
|
||||
k.EnableOutputLayout(DataLayout::byxf);
|
||||
k.EnableOutputLayout(DataLayout::yxfb);
|
||||
k.EnableBatching();
|
||||
return k;
|
||||
}
|
||||
|
||||
KernelsData BatchNormGradKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,30 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "batch_norm_grad_kernel_base.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
class BatchNormGradKernelRef : public BatchNormGradKernelBase {
|
||||
public:
|
||||
BatchNormGradKernelRef() : BatchNormGradKernelBase("batch_norm_grad_gpu_ref") {}
|
||||
virtual ~BatchNormGradKernelRef() {}
|
||||
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,29 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "batch_norm_grad_kernel_selector.h"
|
||||
#include "batch_norm_grad_kernel_ref.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
batch_norm_grad_kernel_selector::batch_norm_grad_kernel_selector() {
|
||||
Attach<BatchNormGradKernelRef>();
|
||||
}
|
||||
|
||||
KernelsData batch_norm_grad_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
|
||||
return GetNaiveBestKernel(params, options, KernelType::BATCH_NORM_GRAD);
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,35 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_selector.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
class batch_norm_grad_kernel_selector : public kernel_selector_base {
|
||||
public:
|
||||
static batch_norm_grad_kernel_selector& Instance() {
|
||||
static batch_norm_grad_kernel_selector instance_;
|
||||
return instance_;
|
||||
}
|
||||
|
||||
batch_norm_grad_kernel_selector();
|
||||
|
||||
virtual ~batch_norm_grad_kernel_selector() {}
|
||||
|
||||
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,111 +0,0 @@
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "contract_kernel_base.h"
|
||||
#include <vector>
|
||||
#include "kernel_selector_utils.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
JitConstants ContractKernelBase::GetJitConstants(const contract_params& params) const {
|
||||
JitConstants jit = MakeBaseParamsJitConstants(params);
|
||||
|
||||
const size_t no_dim_flag = 6;
|
||||
std::vector<size_t> output_dims(4, no_dim_flag);
|
||||
int out_dim = 2;
|
||||
for (int i = 3; i >= 0; --i) {
|
||||
if (std::find(params.reduction_axes.begin(), params.reduction_axes.end(), i) == params.reduction_axes.end())
|
||||
output_dims.at(i) = out_dim--;
|
||||
}
|
||||
|
||||
if (output_dims[3] != no_dim_flag)
|
||||
jit.AddConstants({MakeJitConstant("DIM_X", output_dims.at(3))});
|
||||
if (output_dims[2] != no_dim_flag)
|
||||
jit.AddConstants({MakeJitConstant("DIM_Y", output_dims.at(2))});
|
||||
if (output_dims[1] != no_dim_flag)
|
||||
jit.AddConstants({MakeJitConstant("DIM_F", output_dims.at(1))});
|
||||
if (output_dims[0] != no_dim_flag)
|
||||
jit.AddConstants({MakeJitConstant("DIM_B", output_dims.at(0))});
|
||||
|
||||
jit.AddConstants({MakeJitConstant("REDUCE_X", output_dims.at(3) == no_dim_flag),
|
||||
MakeJitConstant("REDUCE_Y", output_dims.at(2) == no_dim_flag),
|
||||
MakeJitConstant("REDUCE_F", output_dims.at(1) == no_dim_flag),
|
||||
MakeJitConstant("REDUCE_B", output_dims.at(0) == no_dim_flag)});
|
||||
|
||||
switch (params.mode) {
|
||||
case ContractMode::SUM:
|
||||
jit.AddConstants({MakeJitConstant("REDUCE_SEED", "0"), MakeJitConstant("REDUCE_OPERATION(a, b)", "a + b")});
|
||||
break;
|
||||
case ContractMode::PRODUCT:
|
||||
jit.AddConstants({MakeJitConstant("REDUCE_SEED", "1"), MakeJitConstant("REDUCE_OPERATION(a, b)", "a * b")});
|
||||
break;
|
||||
case ContractMode::ALL:
|
||||
jit.AddConstants(
|
||||
{MakeJitConstant("REDUCE_SEED", "1"), MakeJitConstant("REDUCE_OPERATION(a, b)", "a && b")});
|
||||
break;
|
||||
case ContractMode::ANY:
|
||||
jit.AddConstants(
|
||||
{MakeJitConstant("REDUCE_SEED", "0"), MakeJitConstant("REDUCE_OPERATION(a, b)", "a || b")});
|
||||
break;
|
||||
case ContractMode::MAX:
|
||||
jit.AddConstants({MakeJitConstant("REDUCE_SEED", "UNIT_VAL_MIN"),
|
||||
MakeJitConstant("REDUCE_OPERATION(a, b)", "UNIT_MAX_FUNC(a,b)")});
|
||||
break;
|
||||
}
|
||||
|
||||
return jit;
|
||||
}
|
||||
|
||||
ContractKernelBase::DispatchData ContractKernelBase::SetDefault(const contract_params& params) {
|
||||
const auto& output = params.output;
|
||||
|
||||
DispatchData kd;
|
||||
|
||||
kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
|
||||
|
||||
std::vector<size_t> global{output.Feature().v, output.Y().v, output.X().v};
|
||||
const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
|
||||
|
||||
kd.gws0 = global[0];
|
||||
kd.gws1 = global[1];
|
||||
kd.gws2 = global[2];
|
||||
|
||||
kd.lws0 = local[0];
|
||||
kd.lws1 = local[1];
|
||||
kd.lws2 = local[2];
|
||||
|
||||
return kd;
|
||||
}
|
||||
|
||||
KernelsData ContractKernelBase::GetCommonKernelsData(const Params& params,
|
||||
const optional_params& options,
|
||||
float estimated_time) const {
|
||||
assert(params.GetType() == KernelType::CONTRACT);
|
||||
|
||||
const auto& prim_params =
|
||||
static_cast<const contract_params&>(params);
|
||||
|
||||
auto run_info = SetDefault(prim_params);
|
||||
KernelData k_data = KernelData::Default<contract_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(prim_params);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = k_data.kernels[0];
|
||||
FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point);
|
||||
k_data.estimatedTime = estimated_time;
|
||||
|
||||
return {k_data};
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,52 +0,0 @@
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_selector_params.h"
|
||||
#include <vector>
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// contract_params
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct contract_params : public base_params {
|
||||
contract_params() : base_params(KernelType::CONTRACT), mode(ContractMode::ANY) {}
|
||||
ContractMode mode;
|
||||
std::vector<uint16_t> reduction_axes;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// contract_optional_params
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct contract_optional_params : optional_params {
|
||||
contract_optional_params() : optional_params(KernelType::CONTRACT) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ContractKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class ContractKernelBase : public common_kernel_base {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
||||
protected:
|
||||
JitConstants GetJitConstants(const contract_params& params) const;
|
||||
static DispatchData SetDefault(const contract_params& params);
|
||||
KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,49 +0,0 @@
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "contract_kernel_ref.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
ParamsKey ContractKernelRef::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
|
||||
k.EnableInputDataType(Datatype::F16);
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableInputDataType(Datatype::UINT8);
|
||||
k.EnableInputDataType(Datatype::INT32);
|
||||
k.EnableInputDataType(Datatype::INT64);
|
||||
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableOutputDataType(Datatype::F16);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::UINT8);
|
||||
k.EnableOutputDataType(Datatype::INT32);
|
||||
k.EnableOutputDataType(Datatype::INT64);
|
||||
|
||||
k.EnableInputLayout(DataLayout::bfyx);
|
||||
|
||||
k.EnableOutputLayout(DataLayout::bfyx);
|
||||
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBatching();
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
KernelsData ContractKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,27 +0,0 @@
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "contract_kernel_base.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
class ContractKernelRef : public ContractKernelBase {
|
||||
public:
|
||||
ContractKernelRef() : ContractKernelBase("contract_ref") {}
|
||||
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,24 +0,0 @@
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "contract_kernel_selector.h"
|
||||
#include "contract_kernel_ref.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
contract_kernel_selector::contract_kernel_selector() { Attach<ContractKernelRef>(); }
|
||||
|
||||
KernelsData contract_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
|
||||
return GetNaiveBestKernel(params, options, KernelType::CONTRACT);
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,31 +0,0 @@
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_selector.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
class contract_kernel_selector : public kernel_selector_base {
|
||||
public:
|
||||
static contract_kernel_selector& Instance() {
|
||||
static contract_kernel_selector instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
contract_kernel_selector();
|
||||
|
||||
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,87 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.h"
|
||||
#include "kernel_selector_utils.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
ParamsKey ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
k.EnableInputWeightsType(WeightsType::INT8);
|
||||
k.EnableInputLayout(DataLayout::byx8_f4);
|
||||
k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableDilation();
|
||||
k.EnableBiasPerFeature();
|
||||
k.EnableBatching();
|
||||
k.EnableQuantization(QuantizationType::SYMMETRIC);
|
||||
k.DisableTuning();
|
||||
return k;
|
||||
}
|
||||
|
||||
bool ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::Validate(const Params& p, const optional_params& o) const {
|
||||
if (!Parent::Validate(p, o)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t static get_wg_batch_size(const convolution_params& params) {
|
||||
if (params.inputs[0].Batch().v % 64 == 0)
|
||||
return 32;
|
||||
return 1;
|
||||
}
|
||||
|
||||
ConvolutionKernelBase::DispatchData ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::SetDefault(
|
||||
const convolution_params& arg,
|
||||
int) const {
|
||||
DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
|
||||
|
||||
runInfo.efficiency = FORCE_PRIORITY_1;
|
||||
|
||||
runInfo.gws0 = (arg.output.Batch().v * arg.output.Feature().v) / (4 * 2);
|
||||
runInfo.gws1 = arg.output.X().v / 8;
|
||||
runInfo.gws2 = arg.output.Y().v / 2;
|
||||
|
||||
runInfo.lws0 = 8 * get_wg_batch_size(arg);
|
||||
runInfo.lws1 = 1;
|
||||
runInfo.lws2 = 1;
|
||||
|
||||
return runInfo;
|
||||
}
|
||||
|
||||
JitConstants ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::GetJitConstants(const convolution_params& params,
|
||||
const DispatchData& kd) const {
|
||||
auto jits = ConvolutionKernelBase::GetJitConstants(params, kd);
|
||||
|
||||
jits.AddConstant(MakeJitConstant("WG_BATCH_SIZE", get_wg_batch_size(params)));
|
||||
|
||||
return jits;
|
||||
}
|
||||
|
||||
KernelsData ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params,
|
||||
const optional_params& options) const {
|
||||
KernelsData kd = GetCommonKernelsData(params, options, " -Dcl_intel_subgroups_char");
|
||||
if (!kd.empty())
|
||||
kd[0].estimatedTime = FORCE_PRIORITY_3;
|
||||
return kd;
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,41 +0,0 @@
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "convolution_kernel_base.h"
|
||||
#include <vector>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32 : public ConvolutionKernelBase {
|
||||
public:
|
||||
using Parent = ConvolutionKernelBase;
|
||||
ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32()
|
||||
: ConvolutionKernelBase("convolution_gpu_byx8_f4__fs_bs_yx_bsv4_fsv32") {}
|
||||
virtual ~ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32() {}
|
||||
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
|
||||
protected:
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
|
||||
ConvolutionKernelBase::DispatchData SetDefault(const convolution_params& arg, int) const override;
|
||||
WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
|
||||
return WeightsLayout::os_is_y_x8_osv8_isv4_swizzled_by_4;
|
||||
}
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,61 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.h"
|
||||
#include "kernel_selector_utils.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
ParamsKey ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
k.EnableInputWeightsType(WeightsType::INT8);
|
||||
k.EnableInputLayout(DataLayout::byxf);
|
||||
k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBiasPerFeature();
|
||||
k.EnableNonBiasTerm();
|
||||
k.EnableBatching();
|
||||
k.EnableQuantization(QuantizationType::SYMMETRIC);
|
||||
k.DisableTuning();
|
||||
return k;
|
||||
}
|
||||
|
||||
ConvolutionKernelBase::DispatchData ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32::SetDefault(
|
||||
const convolution_params& arg,
|
||||
int) const {
|
||||
DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
|
||||
|
||||
runInfo.efficiency = FORCE_PRIORITY_1;
|
||||
|
||||
runInfo.gws0 = (arg.output.Batch().v * arg.output.Feature().v) / 4;
|
||||
runInfo.gws1 = arg.output.X().v / 8;
|
||||
runInfo.gws2 = arg.output.Y().v;
|
||||
|
||||
runInfo.lws0 = 8;
|
||||
runInfo.lws1 = 1;
|
||||
runInfo.lws2 = 1;
|
||||
|
||||
return runInfo;
|
||||
}
|
||||
|
||||
KernelsData ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params,
|
||||
const optional_params& options) const {
|
||||
return GetCommonKernelsData(params, options);
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,37 +0,0 @@
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "convolution_kernel_base.h"
|
||||
#include <vector>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32 : public ConvolutionKernelBase {
|
||||
public:
|
||||
ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32() : ConvolutionKernelBase("convolution_gpu_byxf_fs_bs_yx_bsv4_fsv32") {}
|
||||
virtual ~ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32() {}
|
||||
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
|
||||
protected:
|
||||
ConvolutionKernelBase::DispatchData SetDefault(const convolution_params& arg, int) const override;
|
||||
WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
|
||||
return WeightsLayout::yxio;
|
||||
}
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,108 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "convolution_kernel_mmad_1x1_gemm.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
ParamsKey ConvolutionKernel_mmad_1x1_gemm::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
k.EnableInputWeightsType(WeightsType::INT8);
|
||||
k.EnableInputLayout(DataLayout::byxf_af32);
|
||||
k.EnableOutputLayout(DataLayout::byxf_af32);
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableDilation();
|
||||
k.EnableBiasPerFeature();
|
||||
k.EnableBiasPerOutput();
|
||||
k.EnableNonBiasTerm();
|
||||
k.EnableBatching();
|
||||
k.EnableSplitSupport();
|
||||
k.EnableDepthwiseSeparableOpt();
|
||||
k.EnableQuantization(QuantizationType::SYMMETRIC);
|
||||
k.DisableTuning();
|
||||
return k;
|
||||
}
|
||||
|
||||
bool ConvolutionKernel_mmad_1x1_gemm::Validate(const Params& p, const optional_params& o) const {
|
||||
if (!ConvolutionKernelBase::Validate(p, o)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& params = static_cast<const convolution_params&>(p);
|
||||
|
||||
if (params.filterSize.x != 1 || params.filterSize.y != 1)
|
||||
return false;
|
||||
|
||||
if (params.stride.x != 1 || params.stride.y != 1)
|
||||
return false;
|
||||
|
||||
if (params.padding.x != 0 || params.padding.y != 0)
|
||||
return false;
|
||||
|
||||
const auto& input = params.inputs[0];
|
||||
|
||||
// we do not support padded input
|
||||
if (input.X().pad.Total() != 0 || input.Y().pad.Total() != 0)
|
||||
return false;
|
||||
|
||||
if (params.split != 1)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_1x1_gemm::SetDefault(const convolution_params& arg, int) const {
|
||||
DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
|
||||
|
||||
// Sub-group size used by "convolution_1x1_gemm_MMAD" kernel.
|
||||
constexpr size_t sub_group_size = 8;
|
||||
|
||||
const auto of_maps = arg.output.Feature().v;
|
||||
const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
|
||||
|
||||
runInfo.efficiency = FORCE_PRIORITY_2;
|
||||
|
||||
runInfo.gws0 = RoundUp(arg.output.X().v * arg.output.Y().v, 8) / 8;
|
||||
runInfo.gws1 = of_threads_per_batch * arg.output.Batch().v;
|
||||
runInfo.gws2 = 1;
|
||||
|
||||
runInfo.lws0 = 1;
|
||||
runInfo.lws1 = sub_group_size;
|
||||
runInfo.lws2 = 1;
|
||||
|
||||
return runInfo;
|
||||
}
|
||||
|
||||
JitConstants ConvolutionKernel_mmad_1x1_gemm::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const {
|
||||
auto jit = Parent::GetJitConstants(params, runInfo);
|
||||
|
||||
jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws1));
|
||||
|
||||
// pitch for special block format used in this kernel
|
||||
const size_t ifm_32_aligned = Align(params.weights.IFM().v, 32);
|
||||
const size_t filter_ofm_block_pitch = (ifm_32_aligned / 32) * params.weights.X().v * params.weights.Y().v * 4 * 8 * 8;
|
||||
jit.AddConstant(MakeJitConstant("FILTER_OFM_BLOCK_PITCH", filter_ofm_block_pitch));
|
||||
|
||||
return jit;
|
||||
}
|
||||
|
||||
KernelsData ConvolutionKernel_mmad_1x1_gemm::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
return GetTunedKernelsDataByIndex(params, options);
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,40 +0,0 @@
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "convolution_kernel_base.h"
|
||||
#include <vector>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class ConvolutionKernel_mmad_1x1_gemm : public ConvolutionKernelBase {
|
||||
public:
|
||||
using Parent = ConvolutionKernelBase;
|
||||
ConvolutionKernel_mmad_1x1_gemm() : ConvolutionKernelBase("convolution_gpu_1x1_gemm_MMAD") {}
|
||||
virtual ~ConvolutionKernel_mmad_1x1_gemm() {}
|
||||
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
|
||||
protected:
|
||||
JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
|
||||
DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
|
||||
return WeightsLayout::os_is_yx_isa8_osv8_isv4;
|
||||
}
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,180 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.h"
|
||||
#include "kernel_selector_utils.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
static const size_t _SG_TILE_M = 32;
|
||||
static const size_t _SG_TILE_N = 32;
|
||||
static const size_t _SG_SIZE = 8; // sub group size
|
||||
static const size_t _TILES_PER_SG_X = 1; // Persistent threads
|
||||
static const size_t _TILES_PER_SG_Y = 1; // Persistent threads
|
||||
|
||||
ParamsKey ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
k.EnableInputWeightsType(WeightsType::INT8);
|
||||
k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
|
||||
k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBiasPerFeature();
|
||||
k.EnableBatching();
|
||||
k.EnableQuantization(QuantizationType::SYMMETRIC);
|
||||
k.DisableTuning();
|
||||
return k;
|
||||
}
|
||||
|
||||
bool ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::Validate(const Params& p, const optional_params& o) const {
|
||||
if (!ConvolutionKernelBase::Validate(p, o) || !CovolutionCheckInput(p, o)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const convolution_params& cp = static_cast<const convolution_params&>(p);
|
||||
|
||||
// make sure it's 1x1 conv
|
||||
if (cp.filterSize.x != 1 || cp.filterSize.y != 1)
|
||||
return false;
|
||||
|
||||
// make sure stride is 1x1
|
||||
if (cp.stride.x != 1 || cp.stride.y != 1)
|
||||
return false;
|
||||
|
||||
// input padding not supported
|
||||
if (cp.inputs[0].X().pad.Total() != 0 || cp.inputs[0].Y().pad.Total() != 0 ||
|
||||
cp.inputs[0].Feature().pad.Total() != 0 || cp.inputs[0].Batch().pad.Total() != 0)
|
||||
return false;
|
||||
|
||||
// input and output spatial sizes must match
|
||||
if (!(cp.output.X().v == cp.inputs[0].X().v) || !(cp.output.Y().v == cp.inputs[0].Y().v))
|
||||
return false;
|
||||
|
||||
const auto m = cp.output.X().v * cp.output.Y().v * cp.output.Batch().v;
|
||||
const auto k = cp.inputs[0].Feature().v;
|
||||
const auto n = cp.output.Feature().v;
|
||||
|
||||
if (m % 32 != 0 && m % 128 != 0) // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M=128
|
||||
return false;
|
||||
|
||||
if (k % 32 != 0) // Matrix size K, Must be mutliple of 32
|
||||
return false;
|
||||
|
||||
if (n % 32 != 0 && n % 128 != 0) // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N=128
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::SetDefault(
|
||||
const convolution_params& arg,
|
||||
int) const {
|
||||
DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
|
||||
|
||||
runInfo.efficiency = FORCE_PRIORITY_1;
|
||||
|
||||
size_t mat_m = arg.output.X().v * arg.output.Y().v * arg.output.Batch().v;
|
||||
size_t mat_n = arg.output.Feature().v;
|
||||
|
||||
size_t _MATRIX_M = mat_m;
|
||||
size_t _MATRIX_N = mat_n;
|
||||
|
||||
size_t _WG_TILE_M = 128;
|
||||
size_t _WG_TILE_N = 128;
|
||||
|
||||
// Calculate number of threads needed
|
||||
const size_t threadsX = (_MATRIX_N / (_SG_TILE_N / _SG_SIZE)) / _TILES_PER_SG_X;
|
||||
const size_t threadsY = (_MATRIX_M / _SG_TILE_M) / _TILES_PER_SG_Y;
|
||||
|
||||
// Define execution setup for kernel:
|
||||
size_t globalWorkSize[3] = {threadsX, threadsY, 1};
|
||||
size_t localWorkSize[3] = {_SG_SIZE * _WG_TILE_N / _SG_TILE_N, _WG_TILE_M / _SG_TILE_M, 1};
|
||||
|
||||
runInfo.gws0 = globalWorkSize[0];
|
||||
runInfo.gws1 = globalWorkSize[1];
|
||||
runInfo.gws2 = globalWorkSize[2];
|
||||
|
||||
runInfo.lws0 = localWorkSize[0];
|
||||
runInfo.lws1 = localWorkSize[1];
|
||||
runInfo.lws2 = localWorkSize[2];
|
||||
|
||||
return runInfo;
|
||||
}
|
||||
|
||||
JitConstants ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::GetJitConstants(const convolution_params& params,
|
||||
const DispatchData& runInfo) const {
|
||||
auto jit = Parent::GetJitConstants(params, runInfo);
|
||||
|
||||
jit.AddConstant(MakeJitConstant("WG_TILE_M", 128)); // Work-Group tile size M, Must be mutliple of 32
|
||||
jit.AddConstant(MakeJitConstant("WG_TILE_N", 128)); // Work-Group tile size N, Must be mutliple of 32
|
||||
jit.AddConstant(MakeJitConstant("TILES_PER_SG_X", 1)); // Persistent threads
|
||||
jit.AddConstant(MakeJitConstant("TILES_PER_SG_Y", 1)); // Persistent threads
|
||||
|
||||
// Do not change values below
|
||||
jit.AddConstant(MakeJitConstant("DIM_X", 0));
|
||||
jit.AddConstant(MakeJitConstant("DIM_Y", 1));
|
||||
jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K", 32));
|
||||
jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K_BFLOAT", 16));
|
||||
jit.AddConstant(MakeJitConstant("SG_TILE_M", _SG_TILE_M));
|
||||
jit.AddConstant(MakeJitConstant("SG_TILE_N", _SG_TILE_N));
|
||||
jit.AddConstant(MakeJitConstant("SG_SIZE", _SG_SIZE));
|
||||
jit.AddConstant(MakeJitConstant("SIMD_LANE_M", "SG_TILE_M"));
|
||||
jit.AddConstant(MakeJitConstant("SIMD_LANE_N", "(SG_TILE_N / SG_SIZE)"));
|
||||
jit.AddConstant(MakeJitConstant("WG_SIZE", "(SG_SIZE * WG_TILE_N / SG_TILE_N) * (WG_TILE_M / SG_TILE_M)"));
|
||||
|
||||
jit.AddConstant(MakeJitConstant("COMPILE_KERNELS", ""));
|
||||
jit.AddConstant(MakeJitConstant("TILED_GLOBAL_LAYOUT", ""));
|
||||
jit.AddConstant(MakeJitConstant("OUTPUT_TILED_GLOBAL_LAYOUT", ""));
|
||||
|
||||
const auto& input = params.inputs[0];
|
||||
const auto& output = params.output;
|
||||
|
||||
auto m = output.X().v * output.Y().v * output.Batch().v;
|
||||
auto k = input.Feature().v;
|
||||
auto n = output.Feature().v;
|
||||
|
||||
jit.AddConstant(MakeJitConstant("MATRIX_M", m));
|
||||
jit.AddConstant(MakeJitConstant("MATRIX_K", k));
|
||||
jit.AddConstant(MakeJitConstant("MATRIX_N", n));
|
||||
|
||||
const size_t out_x_pitch = 32 * 4;
|
||||
const size_t out_y_pitch = 32 * 4 * params.output.X().LogicalDimPadded();
|
||||
const size_t out_b_block_pitch = out_y_pitch * params.output.Y().LogicalDimPadded();
|
||||
const size_t out_f_block_pitch = out_b_block_pitch * ((params.output.Batch().v + 3) / 4);
|
||||
const size_t out_offset = out_x_pitch * params.output.X().pad.before + out_y_pitch * params.output.Y().pad.before;
|
||||
|
||||
jit.AddConstant(MakeJitConstant("OUT_X_PITCH", out_x_pitch));
|
||||
jit.AddConstant(MakeJitConstant("OUT_Y_PITCH", out_y_pitch));
|
||||
jit.AddConstant(MakeJitConstant("OUT_B_BLOCK_PITCH", out_b_block_pitch));
|
||||
jit.AddConstant(MakeJitConstant("OUT_F_BLOCK_PITCH", out_f_block_pitch));
|
||||
jit.AddConstant(MakeJitConstant("OUT_OFFSET", out_offset));
|
||||
|
||||
bool out_padding = output.X().pad.Total() != 0 || output.Y().pad.Total() != 0;
|
||||
jit.AddConstant(MakeJitConstant("OUT_WITH_PADDING", out_padding));
|
||||
|
||||
return jit;
|
||||
}
|
||||
|
||||
KernelsData ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::GetKernelsData(const Params& params,
|
||||
const optional_params& options) const {
|
||||
KernelsData kd = GetCommonKernelsData(params, options);
|
||||
if (!kd.empty())
|
||||
kd[0].estimatedTime = FORCE_PRIORITY_1; // _3
|
||||
return kd;
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,42 +0,0 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "convolution_kernel_base.h"
|
||||
#include <vector>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8 : public ConvolutionKernelBase {
|
||||
public:
|
||||
using Parent = ConvolutionKernelBase;
|
||||
ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8()
|
||||
: ConvolutionKernelBase("convolution_gpu_mmad_32x32sg_128x128wg_slm_int8") {}
|
||||
|
||||
virtual ~ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8() {}
|
||||
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
|
||||
protected:
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
|
||||
DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
|
||||
WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
|
||||
return WeightsLayout::is_o32_yx_isv32_swizzled_by_4;
|
||||
}
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,180 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.h"
|
||||
#include "kernel_selector_utils.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
static const size_t _SG_TILE_M = 32;
|
||||
static const size_t _SG_TILE_N = 32;
|
||||
static const size_t _SG_SIZE = 8; // sub group size
|
||||
static const size_t _TILES_PER_SG_X = 1; // Persistent threads
|
||||
static const size_t _TILES_PER_SG_Y = 1; // Persistent threads
|
||||
|
||||
ParamsKey ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
k.EnableInputWeightsType(WeightsType::INT8);
|
||||
k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
|
||||
k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBiasPerFeature();
|
||||
k.EnableBatching();
|
||||
k.EnableQuantization(QuantizationType::SYMMETRIC);
|
||||
k.DisableTuning();
|
||||
return k;
|
||||
}
|
||||
|
||||
bool ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::Validate(const Params& p, const optional_params& o) const {
|
||||
if (!ConvolutionKernelBase::Validate(p, o) || !CovolutionCheckInput(p, o)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const convolution_params& cp = static_cast<const convolution_params&>(p);
|
||||
|
||||
// make sure it's 1x1 conv
|
||||
if (cp.filterSize.x != 1 || cp.filterSize.y != 1)
|
||||
return false;
|
||||
|
||||
// make sure stride is 1x1
|
||||
if (cp.stride.x != 1 || cp.stride.y != 1)
|
||||
return false;
|
||||
|
||||
// input padding not supported
|
||||
if (cp.inputs[0].X().pad.Total() != 0 || cp.inputs[0].Y().pad.Total() != 0 ||
|
||||
cp.inputs[0].Feature().pad.Total() != 0 || cp.inputs[0].Batch().pad.Total() != 0)
|
||||
return false;
|
||||
|
||||
// input and output spatial sizes must match
|
||||
if (!(cp.output.X().v == cp.inputs[0].X().v) || !(cp.output.Y().v == cp.inputs[0].Y().v))
|
||||
return false;
|
||||
|
||||
const auto m = cp.output.X().v * cp.output.Y().v * cp.output.Batch().v;
|
||||
const auto k = cp.inputs[0].Feature().v;
|
||||
const auto n = cp.output.Feature().v;
|
||||
|
||||
if (m % 32 != 0 && m % 224 != 0) // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M=128
|
||||
return false;
|
||||
|
||||
if (k % 32 != 0) // Matrix size K, Must be mutliple of 32
|
||||
return false;
|
||||
|
||||
if (n % 32 != 0 && n % 128 != 0) // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N=128
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::SetDefault(
|
||||
const convolution_params& arg,
|
||||
int) const {
|
||||
DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
|
||||
|
||||
runInfo.efficiency = FORCE_PRIORITY_1;
|
||||
|
||||
size_t mat_m = arg.output.X().v * arg.output.Y().v * arg.output.Batch().v;
|
||||
size_t mat_n = arg.output.Feature().v;
|
||||
|
||||
size_t _MATRIX_M = mat_m;
|
||||
size_t _MATRIX_N = mat_n;
|
||||
|
||||
size_t _WG_TILE_M = 224;
|
||||
size_t _WG_TILE_N = 128;
|
||||
|
||||
// Calculate number of threads needed
|
||||
const size_t threadsX = (_MATRIX_N / (_SG_TILE_N / _SG_SIZE)) / _TILES_PER_SG_X;
|
||||
const size_t threadsY = (_MATRIX_M / _SG_TILE_M) / _TILES_PER_SG_Y;
|
||||
|
||||
// Define execution setup for kernel:
|
||||
size_t globalWorkSize[3] = {threadsX, threadsY, 1};
|
||||
size_t localWorkSize[3] = {_SG_SIZE * _WG_TILE_N / _SG_TILE_N, _WG_TILE_M / _SG_TILE_M, 1};
|
||||
|
||||
runInfo.gws0 = globalWorkSize[0];
|
||||
runInfo.gws1 = globalWorkSize[1];
|
||||
runInfo.gws2 = globalWorkSize[2];
|
||||
|
||||
runInfo.lws0 = localWorkSize[0];
|
||||
runInfo.lws1 = localWorkSize[1];
|
||||
runInfo.lws2 = localWorkSize[2];
|
||||
|
||||
return runInfo;
|
||||
}
|
||||
|
||||
JitConstants ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::GetJitConstants(const convolution_params& params,
|
||||
const DispatchData& runInfo) const {
|
||||
auto jit = Parent::GetJitConstants(params, runInfo);
|
||||
|
||||
jit.AddConstant(MakeJitConstant("WG_TILE_M", 224)); // Work-Group tile size M, Must be mutliple of 32
|
||||
jit.AddConstant(MakeJitConstant("WG_TILE_N", 128)); // Work-Group tile size N, Must be mutliple of 32
|
||||
jit.AddConstant(MakeJitConstant("TILES_PER_SG_X", _TILES_PER_SG_X));
|
||||
jit.AddConstant(MakeJitConstant("TILES_PER_SG_Y", _TILES_PER_SG_Y));
|
||||
|
||||
// Do not change values below
|
||||
jit.AddConstant(MakeJitConstant("DIM_X", 0));
|
||||
jit.AddConstant(MakeJitConstant("DIM_Y", 1));
|
||||
jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K", 32));
|
||||
jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K_BFLOAT", 16));
|
||||
jit.AddConstant(MakeJitConstant("SG_TILE_M", _SG_TILE_M));
|
||||
jit.AddConstant(MakeJitConstant("SG_TILE_N", _SG_TILE_N));
|
||||
jit.AddConstant(MakeJitConstant("SG_SIZE", _SG_SIZE));
|
||||
jit.AddConstant(MakeJitConstant("SIMD_LANE_M", "SG_TILE_M"));
|
||||
jit.AddConstant(MakeJitConstant("SIMD_LANE_N", "(SG_TILE_N / SG_SIZE)"));
|
||||
jit.AddConstant(MakeJitConstant("WG_SIZE", "(SG_SIZE * WG_TILE_N / SG_TILE_N) * (WG_TILE_M / SG_TILE_M)"));
|
||||
|
||||
jit.AddConstant(MakeJitConstant("COMPILE_KERNELS", ""));
|
||||
jit.AddConstant(MakeJitConstant("TILED_GLOBAL_LAYOUT", ""));
|
||||
jit.AddConstant(MakeJitConstant("OUTPUT_TILED_GLOBAL_LAYOUT", ""));
|
||||
|
||||
const auto& input = params.inputs[0];
|
||||
const auto& output = params.output;
|
||||
|
||||
auto m = output.X().v * output.Y().v * output.Batch().v;
|
||||
auto k = input.Feature().v;
|
||||
auto n = output.Feature().v;
|
||||
|
||||
jit.AddConstant(MakeJitConstant("MATRIX_M", m)); // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M
|
||||
jit.AddConstant(MakeJitConstant("MATRIX_K", k)); // Matrix size K, Must be mutliple of 32
|
||||
jit.AddConstant(MakeJitConstant("MATRIX_N", n)); // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N
|
||||
|
||||
const size_t out_x_pitch = 32 * 4;
|
||||
const size_t out_y_pitch = 32 * 4 * params.output.X().LogicalDimPadded();
|
||||
const size_t out_b_block_pitch = out_y_pitch * params.output.Y().LogicalDimPadded();
|
||||
const size_t out_f_block_pitch = out_b_block_pitch * ((params.output.Batch().v + 3) / 4);
|
||||
const size_t out_offset = out_x_pitch * params.output.X().pad.before + out_y_pitch * params.output.Y().pad.before;
|
||||
|
||||
jit.AddConstant(MakeJitConstant("OUT_X_PITCH", out_x_pitch));
|
||||
jit.AddConstant(MakeJitConstant("OUT_Y_PITCH", out_y_pitch));
|
||||
jit.AddConstant(MakeJitConstant("OUT_B_BLOCK_PITCH", out_b_block_pitch));
|
||||
jit.AddConstant(MakeJitConstant("OUT_F_BLOCK_PITCH", out_f_block_pitch));
|
||||
jit.AddConstant(MakeJitConstant("OUT_OFFSET", out_offset));
|
||||
|
||||
bool out_padding = output.X().pad.Total() != 0 || output.Y().pad.Total() != 0;
|
||||
jit.AddConstant(MakeJitConstant("OUT_WITH_PADDING", out_padding));
|
||||
|
||||
return jit;
|
||||
}
|
||||
|
||||
KernelsData ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::GetKernelsData(const Params& params,
|
||||
const optional_params& options) const {
|
||||
KernelsData kd = GetCommonKernelsData(params, options);
|
||||
if (!kd.empty())
|
||||
kd[0].estimatedTime = FORCE_PRIORITY_1; // _3
|
||||
return kd;
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,42 +0,0 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "convolution_kernel_base.h"
|
||||
#include <vector>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8 : public ConvolutionKernelBase {
|
||||
public:
|
||||
using Parent = ConvolutionKernelBase;
|
||||
ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8()
|
||||
: ConvolutionKernelBase("convolution_gpu_mmad_32x32sg_224x128wg_slm_int8") {}
|
||||
|
||||
virtual ~ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8() {}
|
||||
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
|
||||
protected:
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
|
||||
DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
|
||||
WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
|
||||
return WeightsLayout::is_o32_yx_isv32_swizzled_by_4;
|
||||
}
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,176 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "convolution_kernel_mmad_32x32sg_slm_int8.h"
|
||||
#include "kernel_selector_utils.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
static const size_t _SG_TILE_M = 32;
|
||||
static const size_t _SG_TILE_N = 32;
|
||||
static const size_t _SG_SIZE = 8; // sub group size
|
||||
static const size_t _TILES_PER_SG_X = 1; // Persistent threads
|
||||
static const size_t _TILES_PER_SG_Y = 1; // Persistent threads
|
||||
|
||||
ParamsKey ConvolutionKernel_mmad_32x32sg_slm_int8::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
k.EnableInputWeightsType(WeightsType::INT8);
|
||||
k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
|
||||
k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBiasPerFeature();
|
||||
k.EnableBatching();
|
||||
k.EnableQuantization(QuantizationType::SYMMETRIC);
|
||||
k.DisableTuning();
|
||||
return k;
|
||||
}
|
||||
|
||||
bool ConvolutionKernel_mmad_32x32sg_slm_int8::Validate(const Params& p, const optional_params& o) const {
|
||||
if (!ConvolutionKernelBase::Validate(p, o) || !CovolutionCheckInput(p, o)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const convolution_params& cp = static_cast<const convolution_params&>(p);
|
||||
|
||||
// make sure it's 1x1 conv
|
||||
if (cp.filterSize.x != 1 || cp.filterSize.y != 1)
|
||||
return false;
|
||||
|
||||
// make sure stride is 1x1
|
||||
if (cp.stride.x != 1 || cp.stride.y != 1)
|
||||
return false;
|
||||
|
||||
// input padding not supported
|
||||
if (cp.inputs[0].X().pad.Total() != 0 || cp.inputs[0].Y().pad.Total() != 0 ||
|
||||
cp.inputs[0].Feature().pad.Total() != 0 || cp.inputs[0].Batch().pad.Total() != 0)
|
||||
return false;
|
||||
|
||||
// input and output spatial sizes must match
|
||||
if (!(cp.output.X().v == cp.inputs[0].X().v) || !(cp.output.Y().v == cp.inputs[0].Y().v))
|
||||
return false;
|
||||
|
||||
const auto m = cp.output.X().v * cp.output.Y().v * cp.output.Batch().v;
|
||||
const auto k = cp.inputs[0].Feature().v;
|
||||
const auto n = cp.output.Feature().v;
|
||||
|
||||
if (m % 32 != 0) // Matrix size M, Must be mutliple of 32
|
||||
return false;
|
||||
|
||||
if (k % 32 != 0) // Matrix size K, Must be multiple of 32
|
||||
return false;
|
||||
|
||||
if (n % 32 != 0) // Matrix size N, Must be mutliple of 32
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_32x32sg_slm_int8::SetDefault(const convolution_params& arg,
|
||||
int) const {
|
||||
DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
|
||||
|
||||
runInfo.efficiency = FORCE_PRIORITY_2;
|
||||
|
||||
size_t mat_m = arg.output.X().v * arg.output.Y().v * arg.output.Batch().v;
|
||||
size_t mat_n = arg.output.Feature().v;
|
||||
|
||||
size_t _MATRIX_M = mat_m;
|
||||
size_t _MATRIX_N = mat_n;
|
||||
|
||||
size_t _WG_TILE_M = 32;
|
||||
size_t _WG_TILE_N = 32;
|
||||
|
||||
// Calculate number of threads needed
|
||||
const size_t threadsX = (_MATRIX_N / (_SG_TILE_N / _SG_SIZE)) / _TILES_PER_SG_X;
|
||||
const size_t threadsY = (_MATRIX_M / _SG_TILE_M) / _TILES_PER_SG_Y;
|
||||
|
||||
// Define execution setup for kernel:
|
||||
size_t globalWorkSize[3] = {threadsX, threadsY, 1};
|
||||
size_t localWorkSize[3] = {_SG_SIZE * _WG_TILE_N / _SG_TILE_N, _WG_TILE_M / _SG_TILE_M, 1};
|
||||
|
||||
runInfo.gws0 = globalWorkSize[0];
|
||||
runInfo.gws1 = globalWorkSize[1];
|
||||
runInfo.gws2 = globalWorkSize[2];
|
||||
|
||||
runInfo.lws0 = localWorkSize[0];
|
||||
runInfo.lws1 = localWorkSize[1];
|
||||
runInfo.lws2 = localWorkSize[2];
|
||||
|
||||
return runInfo;
|
||||
}
|
||||
|
||||
JitConstants ConvolutionKernel_mmad_32x32sg_slm_int8::GetJitConstants(const convolution_params& params,
|
||||
const DispatchData& runInfo) const {
|
||||
auto jit = Parent::GetJitConstants(params, runInfo);
|
||||
|
||||
jit.AddConstant(MakeJitConstant("WG_TILE_M", 32)); // Work-Group tile size M, Must be mutliple of 32
|
||||
jit.AddConstant(MakeJitConstant("WG_TILE_N", 32)); // Work-Group tile size N, Must be mutliple of 32
|
||||
jit.AddConstant(MakeJitConstant("TILES_PER_SG_X", _TILES_PER_SG_X));
|
||||
jit.AddConstant(MakeJitConstant("TILES_PER_SG_Y", _TILES_PER_SG_Y));
|
||||
|
||||
// Do not change values below
|
||||
jit.AddConstant(MakeJitConstant("DIM_X", 0));
|
||||
jit.AddConstant(MakeJitConstant("DIM_Y", 1));
|
||||
jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K", 32));
|
||||
jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K_BFLOAT", 16));
|
||||
jit.AddConstant(MakeJitConstant("SG_TILE_M", _SG_TILE_M));
|
||||
jit.AddConstant(MakeJitConstant("SG_TILE_N", _SG_TILE_N));
|
||||
jit.AddConstant(MakeJitConstant("SG_SIZE", _SG_SIZE));
|
||||
jit.AddConstant(MakeJitConstant("SIMD_LANE_M", "SG_TILE_M"));
|
||||
jit.AddConstant(MakeJitConstant("SIMD_LANE_N", "(SG_TILE_N / SG_SIZE)"));
|
||||
jit.AddConstant(MakeJitConstant("WG_SIZE", "(SG_SIZE * WG_TILE_N / SG_TILE_N) * (WG_TILE_M / SG_TILE_M)"));
|
||||
|
||||
jit.AddConstant(MakeJitConstant("COMPILE_KERNELS", ""));
|
||||
jit.AddConstant(MakeJitConstant("TILED_GLOBAL_LAYOUT", ""));
|
||||
jit.AddConstant(MakeJitConstant("OUTPUT_TILED_GLOBAL_LAYOUT", ""));
|
||||
|
||||
const auto& input = params.inputs[0];
|
||||
const auto& output = params.output;
|
||||
|
||||
auto m = output.X().v * output.Y().v * output.Batch().v;
|
||||
auto k = input.Feature().v;
|
||||
auto n = output.Feature().v;
|
||||
|
||||
jit.AddConstant(MakeJitConstant("MATRIX_M", m)); // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M
|
||||
jit.AddConstant(MakeJitConstant("MATRIX_K", k)); // Matrix size K, Must be mutliple of 32
|
||||
jit.AddConstant(MakeJitConstant("MATRIX_N", n)); // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N
|
||||
|
||||
const size_t out_x_pitch = 32 * 4;
|
||||
const size_t out_y_pitch = 32 * 4 * params.output.X().LogicalDimPadded();
|
||||
const size_t out_b_block_pitch = out_y_pitch * params.output.Y().LogicalDimPadded();
|
||||
const size_t out_f_block_pitch = out_b_block_pitch * ((params.output.Batch().v + 3) / 4);
|
||||
const size_t out_offset = out_x_pitch * params.output.X().pad.before + out_y_pitch * params.output.Y().pad.before;
|
||||
|
||||
jit.AddConstant(MakeJitConstant("OUT_X_PITCH", out_x_pitch));
|
||||
jit.AddConstant(MakeJitConstant("OUT_Y_PITCH", out_y_pitch));
|
||||
jit.AddConstant(MakeJitConstant("OUT_B_BLOCK_PITCH", out_b_block_pitch));
|
||||
jit.AddConstant(MakeJitConstant("OUT_F_BLOCK_PITCH", out_f_block_pitch));
|
||||
jit.AddConstant(MakeJitConstant("OUT_OFFSET", out_offset));
|
||||
|
||||
return jit;
|
||||
}
|
||||
|
||||
KernelsData ConvolutionKernel_mmad_32x32sg_slm_int8::GetKernelsData(const Params& params,
|
||||
const optional_params& options) const {
|
||||
KernelsData kd = GetCommonKernelsData(params, options);
|
||||
if (!kd.empty())
|
||||
kd[0].estimatedTime = FORCE_PRIORITY_2; // _3
|
||||
return kd;
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,41 +0,0 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "convolution_kernel_base.h"
|
||||
#include <vector>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class ConvolutionKernel_mmad_32x32sg_slm_int8 : public ConvolutionKernelBase {
|
||||
public:
|
||||
using Parent = ConvolutionKernelBase;
|
||||
ConvolutionKernel_mmad_32x32sg_slm_int8() : ConvolutionKernelBase("convolution_gpu_mmad_32x32sg_slm_int8") {}
|
||||
|
||||
virtual ~ConvolutionKernel_mmad_32x32sg_slm_int8() {}
|
||||
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
|
||||
protected:
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
|
||||
DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
|
||||
WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
|
||||
return WeightsLayout::is_o_yx_isv32;
|
||||
}
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -35,19 +35,8 @@
|
||||
#include "convolution_kernel_winograd_6x3_s1_fused.h"
|
||||
#include "convolution_kernel_mmad.h"
|
||||
#include "convolution_kernel_mmad_blocks.h"
|
||||
#include "convolution_kernel_mmad_1x1_gemm.h"
|
||||
#include "convolution_kernel_imad_byxf_af32_depthwise.h"
|
||||
#include "convolution_kernel_mmad_batched.h"
|
||||
#include "convolution_kernel_bfyx_depthwise_weights_lwg.h"
|
||||
#include "convolution_kernel_mmad_slm_2x14_rep4.h"
|
||||
#include "convolution_kernel_mmad_slm_7x7_rep4.h"
|
||||
#include "convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.h"
|
||||
#include "convolution_kernel_mmad_batched_block.h"
|
||||
#include "convolution_kernel_mmad_batched_block_1x1.h"
|
||||
#include "convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.h"
|
||||
#include "convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.h"
|
||||
#include "convolution_kernel_mmad_32x32sg_slm_int8.h"
|
||||
#include "convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.h"
|
||||
#include "convolution_kernel_imad.h"
|
||||
#include "convolution_kernel_fs_byx_fsv32.h"
|
||||
#include "convolution_kernel_fs_byx_fsv32_1x1.h"
|
||||
@ -134,19 +123,6 @@ convolution_kernel_selector::convolution_kernel_selector() {
|
||||
Attach<ConvolutionKernel_mmad_blocks>();
|
||||
Attach<ConvolutionKernel_imad_byxf_af32_1x1>();
|
||||
Attach<ConvolutionKernel_imad_byxf_af32_depthiwise>();
|
||||
Attach<ConvolutionKernel_mmad_1x1_gemm>();
|
||||
|
||||
// fs_bs_yx_bsv4_fsv32 int8
|
||||
Attach<ConvolutionKernel_mmad_batched>();
|
||||
Attach<ConvolutionKernel_mmad_slm_2x14_rep4>();
|
||||
Attach<ConvolutionKernel_mmad_slm_7x7_rep4>();
|
||||
Attach<ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8>();
|
||||
Attach<ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8>();
|
||||
Attach<ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32>();
|
||||
Attach<ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32>();
|
||||
Attach<ConvolutionKernel_mmad_batched_block>();
|
||||
Attach<ConvolutionKernel_mmad_batched_block_1x1>();
|
||||
// Attach<ConvolutionKernel_mmad_32x32sg_slm_int8>();
|
||||
|
||||
// b_fs_yx_fsv4 kernels
|
||||
Attach<ConvolutionKernel_imad>();
|
||||
|
@ -1,67 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "convolution_grad_weights_kernel_1x1.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
ParamsKey ConvolutionGradWeightsKernel1x1::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableInputWeightsType(WeightsType::F32);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableInputLayout(DataLayout::bfyx);
|
||||
k.EnableOutputLayout(DataLayout::yxfb);
|
||||
k.EnableOutputLayout(DataLayout::bfyx);
|
||||
k.EnableOutputLayout(DataLayout::byxf);
|
||||
k.EnableSubGroup();
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBiasPerFeature();
|
||||
k.EnableNonBiasTerm();
|
||||
k.EnableMomentum();
|
||||
k.EnableBatching();
|
||||
k.EnableSplitSupport();
|
||||
k.EnableGradient();
|
||||
k.DisableTuning();
|
||||
return k;
|
||||
}
|
||||
|
||||
bool ConvolutionGradWeightsKernel1x1::Validate(const Params& p, const optional_params&) const {
|
||||
const convolution_grad_weights_params& params = static_cast<const convolution_grad_weights_params&>(p);
|
||||
|
||||
if (params.filterSize.x != 1 || params.filterSize.y != 1)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel1x1::SetDefault(
|
||||
const convolution_grad_weights_params& params) const {
|
||||
auto input_features = params.weights.IFM().v;
|
||||
auto output_features = params.weights.OFM().v;
|
||||
|
||||
DispatchData kd;
|
||||
|
||||
kd.gws0 = 16;
|
||||
kd.gws1 = input_features;
|
||||
kd.gws2 = output_features;
|
||||
kd.lws0 = 16;
|
||||
kd.lws1 = 1;
|
||||
kd.lws2 = 1;
|
||||
kd.efficiency = FORCE_PRIORITY_8;
|
||||
return kd;
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,32 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "convolution_grad_weights_kernel_base.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class ConvolutionGradWeightsKernel1x1 : public ConvolutionGradWeightsKernelBase {
|
||||
public:
|
||||
ConvolutionGradWeightsKernel1x1() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_1x1") {}
|
||||
virtual ~ConvolutionGradWeightsKernel1x1() {}
|
||||
|
||||
DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,72 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "convolution_grad_weights_kernel_3x3.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
ParamsKey ConvolutionGradWeightsKernel3x3::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableInputWeightsType(WeightsType::F32);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableInputLayout(DataLayout::bfyx);
|
||||
k.EnableOutputLayout(DataLayout::yxfb);
|
||||
k.EnableOutputLayout(DataLayout::bfyx);
|
||||
k.EnableOutputLayout(DataLayout::byxf);
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBiasPerFeature();
|
||||
k.EnableNonBiasTerm();
|
||||
k.EnableMomentum();
|
||||
k.EnableBatching();
|
||||
k.EnableSplitSupport();
|
||||
k.EnableGradient();
|
||||
k.DisableTuning();
|
||||
return k;
|
||||
}
|
||||
|
||||
bool ConvolutionGradWeightsKernel3x3::Validate(const Params& p, const optional_params&) const {
|
||||
const auto& params = static_cast<const convolution_grad_weights_params&>(p);
|
||||
|
||||
if (params.stride.x != 1 || params.stride.y != 1)
|
||||
return false;
|
||||
if (params.filterSize.x != 3 || params.filterSize.y != 3)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel3x3::SetDefault(
|
||||
const convolution_grad_weights_params& params) const {
|
||||
auto input_features = params.weights.IFM().v;
|
||||
auto output_features = params.weights.OFM().v;
|
||||
|
||||
DispatchData kd;
|
||||
|
||||
kd.gws0 = Align(output_features, 16);
|
||||
kd.gws1 = input_features;
|
||||
kd.gws2 = 1;
|
||||
kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
|
||||
while (kd.gws0 % kd.lws0 != 0) {
|
||||
kd.lws0 -= 16;
|
||||
}
|
||||
kd.lws1 = 1;
|
||||
kd.lws2 = 1;
|
||||
kd.efficiency = FORCE_PRIORITY_8;
|
||||
return kd;
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,32 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "convolution_grad_weights_kernel_base.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class ConvolutionGradWeightsKernel3x3 : public ConvolutionGradWeightsKernelBase {
|
||||
public:
|
||||
ConvolutionGradWeightsKernel3x3() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_3x3") {}
|
||||
virtual ~ConvolutionGradWeightsKernel3x3() {}
|
||||
|
||||
DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,70 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "convolution_grad_weights_kernel_7x7.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
ParamsKey ConvolutionGradWeightsKernel7x7::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableInputWeightsType(WeightsType::F32);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableInputLayout(DataLayout::bfyx);
|
||||
k.EnableOutputLayout(DataLayout::yxfb);
|
||||
k.EnableOutputLayout(DataLayout::bfyx);
|
||||
k.EnableOutputLayout(DataLayout::byxf);
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBiasPerFeature();
|
||||
k.EnableNonBiasTerm();
|
||||
k.EnableMomentum();
|
||||
k.EnableBatching();
|
||||
k.EnableSplitSupport();
|
||||
k.EnableGradient();
|
||||
k.DisableTuning();
|
||||
return k;
|
||||
}
|
||||
|
||||
bool ConvolutionGradWeightsKernel7x7::Validate(const Params& p, const optional_params&) const {
|
||||
const auto& params = static_cast<const convolution_grad_weights_params&>(p);
|
||||
|
||||
if (params.filterSize.x != 7 || params.filterSize.y != 7)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel7x7::SetDefault(
|
||||
const convolution_grad_weights_params& params) const {
|
||||
auto input_features = params.weights.IFM().v;
|
||||
auto output_features = params.weights.OFM().v;
|
||||
|
||||
DispatchData kd;
|
||||
|
||||
kd.gws0 = 8;
|
||||
kd.gws1 = Align(output_features, 16);
|
||||
kd.gws2 = input_features;
|
||||
kd.lws0 = 1;
|
||||
kd.lws1 = std::min(std::max(kd.gws1, static_cast<size_t>(1)), static_cast<size_t>(32));
|
||||
while (kd.gws1 % kd.lws1 != 0) {
|
||||
kd.lws1 -= 16;
|
||||
}
|
||||
kd.lws2 = 1;
|
||||
kd.efficiency = FORCE_PRIORITY_8;
|
||||
return kd;
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,32 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "convolution_grad_weights_kernel_base.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class ConvolutionGradWeightsKernel7x7 : public ConvolutionGradWeightsKernelBase {
|
||||
public:
|
||||
ConvolutionGradWeightsKernel7x7() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_7x7") {}
|
||||
virtual ~ConvolutionGradWeightsKernel7x7() {}
|
||||
|
||||
DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,135 +0,0 @@
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "convolution_grad_weights_kernel_base.h"
|
||||
#include "kernel_selector_utils.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
namespace kernel_selector {
|
||||
std::string convolution_grad_weights_params::to_string() const {
|
||||
std::stringstream s;
|
||||
|
||||
s << base_params::to_string() << "_";
|
||||
if (bias.empty()) {
|
||||
s << "no_bias"
|
||||
<< "_";
|
||||
} else {
|
||||
s << "bias_" << bias[0].PhysicalSize() << "_";
|
||||
}
|
||||
s << filterSize.x << "_" << filterSize.y << "_";
|
||||
s << stride.x << "_" << stride.y << "_";
|
||||
s << dilation.x << "_" << dilation.y << "_";
|
||||
s << padding.x << "_" << padding.y << "_";
|
||||
s << split;
|
||||
|
||||
return s.str();
|
||||
}
|
||||
|
||||
JitConstants ConvolutionGradWeightsKernelBase::GetJitConstants(const convolution_grad_weights_params& cp) const {
|
||||
JitConstants jit = training_kernel_base::GetJitConstants(cp);
|
||||
const auto& padding = cp.padding;
|
||||
const auto& input = cp.inputs[0];
|
||||
|
||||
int64_t input_offset_with_padding = (int64_t)input.GetFirstElementOffset() -
|
||||
(cp.filterSize.x - 1 + padding.x) * input.X().pitch -
|
||||
(cp.filterSize.y - 1 + padding.y) * input.Y().pitch;
|
||||
input_offset_with_padding = std::max(input_offset_with_padding, (int64_t)0);
|
||||
|
||||
jit.AddConstants({
|
||||
MakeJitConstant("STRIDE", cp.stride),
|
||||
MakeJitConstant("PADDING", cp.padding),
|
||||
MakeJitConstant("DILATION", cp.dilation),
|
||||
MakeJitConstant("FILTER_ARRAY_NUM", cp.split),
|
||||
MakeJitConstant("INPUT0_OFFSET_WITH_PADDING", input_offset_with_padding),
|
||||
MakeJitConstant("DEPTHWISE_SEPARABLE_OPT", cp.depthwise_separable_opt),
|
||||
MakeJitConstant("OUTPUT_GRAD_W", cp.output_grad_w),
|
||||
});
|
||||
|
||||
return jit;
|
||||
}
|
||||
|
||||
ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernelBase::SetDefault(
|
||||
const convolution_grad_weights_params& params) const {
|
||||
auto input_features = params.weights.IFM().v;
|
||||
auto output_features = params.weights.OFM().v;
|
||||
|
||||
DispatchData kd;
|
||||
|
||||
kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
|
||||
size_t gws0 = output_features * input_features;
|
||||
size_t lws0 = std::min(gws0, static_cast<size_t>(32));
|
||||
while (gws0 % lws0) {
|
||||
lws0--;
|
||||
}
|
||||
kd.gws0 = gws0;
|
||||
kd.gws1 = params.weights.X().v;
|
||||
kd.gws2 = params.weights.Y().v;
|
||||
kd.lws0 = lws0;
|
||||
kd.lws1 = 1;
|
||||
kd.lws2 = 1;
|
||||
kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
|
||||
return kd;
|
||||
}
|
||||
|
||||
KernelsData ConvolutionGradWeightsKernelBase::GetKernelsData(const Params& params,
|
||||
const optional_params& options) const {
|
||||
assert(params.GetType() == KernelType::CONVOLUTION_GRAD_WEIGHTS);
|
||||
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const convolution_grad_weights_params& orgParams = static_cast<const convolution_grad_weights_params&>(params);
|
||||
|
||||
DispatchData runInfo = SetDefault(orgParams);
|
||||
KernelData kd = KernelData::Default<convolution_grad_weights_params>(params);
|
||||
convolution_grad_weights_params& newParams = *static_cast<convolution_grad_weights_params*>(kd.params.get());
|
||||
|
||||
bool succeed = UpdateWeightsParams(newParams, options, WeightsLayout::oiyx, kd.weightsReorderParams);
|
||||
|
||||
if (!succeed) {
|
||||
return {};
|
||||
}
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
FillCLKernelData(kernel,
|
||||
runInfo,
|
||||
params.engineInfo,
|
||||
kernelName,
|
||||
jit,
|
||||
entry_point,
|
||||
DEFAULT,
|
||||
true,
|
||||
!orgParams.bias.empty());
|
||||
if (newParams.use_momentum) {
|
||||
kernel.arguments.push_back({ArgumentDescriptor::Types::PREV_WEIGHTS_GRADIENT, 0});
|
||||
if (!newParams.bias.empty())
|
||||
kernel.arguments.push_back({ArgumentDescriptor::Types::PREV_BIAS_GRADIENT, 0});
|
||||
}
|
||||
kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
|
||||
kernel.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0});
|
||||
kernel.arguments.push_back({ArgumentDescriptor::Types::LEARNING_RATE, 0});
|
||||
|
||||
kd.estimatedTime = runInfo.efficiency;
|
||||
|
||||
return {kd};
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,79 +0,0 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "training_kernel_base.h"
|
||||
#include "kernel_selector_params.h"
|
||||
#include <string>
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// convolution_grad_weights_params
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct convolution_grad_weights_params : public training_params {
|
||||
convolution_grad_weights_params() : training_params(KernelType::CONVOLUTION_GRAD_WEIGHTS) {}
|
||||
|
||||
uSize filterSize;
|
||||
uSize stride;
|
||||
uSize dilation;
|
||||
uSize padding;
|
||||
uint32_t split = 1;
|
||||
bool depthwise_separable_opt = false;
|
||||
bool output_grad_w = false;
|
||||
|
||||
std::string to_string() const override;
|
||||
|
||||
ParamsKey GetParamsKey() const override {
|
||||
ParamsKey k = training_params::GetParamsKey();
|
||||
|
||||
if (split > 1) {
|
||||
k.EnableSplitSupport();
|
||||
}
|
||||
|
||||
if (dilation.x != 1 || dilation.y != 1) {
|
||||
k.EnableDilation();
|
||||
}
|
||||
|
||||
if (depthwise_separable_opt) {
|
||||
k.EnableDepthwiseSeparableOpt();
|
||||
}
|
||||
return k;
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// convolution_grad_weights_optional_params
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct convolution_grad_weights_optional_params : training_optional_params {
|
||||
convolution_grad_weights_optional_params() : training_optional_params(KernelType::CONVOLUTION_GRAD_WEIGHTS) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ConvolutionGradWeightsKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class ConvolutionGradWeightsKernelBase : public training_kernel_base {
|
||||
public:
|
||||
using training_kernel_base::training_kernel_base;
|
||||
virtual ~ConvolutionGradWeightsKernelBase() {}
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
||||
protected:
|
||||
virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const;
|
||||
virtual JitConstants GetJitConstants(const convolution_grad_weights_params& params) const;
|
||||
virtual DispatchData SetDefault(const convolution_grad_weights_params& params) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,45 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "convolution_grad_weights_kernel_ref.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
ParamsKey ConvolutionGradWeightsKernelRef::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::F16);
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableInputWeightsType(WeightsType::F32);
|
||||
k.EnableOutputDataType(Datatype::F16);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableInputLayout(DataLayout::yxfb);
|
||||
k.EnableInputLayout(DataLayout::bfyx);
|
||||
k.EnableInputLayout(DataLayout::byxf);
|
||||
k.EnableOutputLayout(DataLayout::yxfb);
|
||||
k.EnableOutputLayout(DataLayout::bfyx);
|
||||
k.EnableOutputLayout(DataLayout::byxf);
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBiasPerFeature();
|
||||
k.EnableNonBiasTerm();
|
||||
k.EnableMomentum();
|
||||
k.EnableBatching();
|
||||
k.EnableSplitSupport();
|
||||
k.EnableGradient();
|
||||
k.DisableTuning();
|
||||
return k;
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,29 +0,0 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "convolution_grad_weights_kernel_base.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class ConvolutionGradWeightsKernelRef : public ConvolutionGradWeightsKernelBase {
|
||||
public:
|
||||
ConvolutionGradWeightsKernelRef() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_ref") {}
|
||||
virtual ~ConvolutionGradWeightsKernelRef() {}
|
||||
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,36 +0,0 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "convolution_grad_weights_kernel_selector.h"
|
||||
#include "convolution_grad_weights_kernel_ref.h"
|
||||
#include "convolution_grad_weights_kernel_1x1.h"
|
||||
#include "convolution_grad_weights_kernel_yxfb.h"
|
||||
#include "convolution_grad_weights_kernel_3x3.h"
|
||||
#include "convolution_grad_weights_kernel_7x7.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
convolution_grad_weights_kernel_selector::convolution_grad_weights_kernel_selector() {
|
||||
Attach<ConvolutionGradWeightsKernelRef>();
|
||||
Attach<ConvolutionGradWeightsKernel1x1>();
|
||||
Attach<ConvolutionGradWeightsKernel_yxfb>();
|
||||
Attach<ConvolutionGradWeightsKernel3x3>();
|
||||
Attach<ConvolutionGradWeightsKernel7x7>();
|
||||
}
|
||||
|
||||
KernelsData convolution_grad_weights_kernel_selector::GetBestKernels(const Params& params,
|
||||
const optional_params& options) const {
|
||||
return GetNaiveBestKernel(params, options, KernelType::CONVOLUTION_GRAD_WEIGHTS);
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,34 +0,0 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_selector.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
class convolution_grad_weights_kernel_selector : public kernel_selector_base {
|
||||
public:
|
||||
static convolution_grad_weights_kernel_selector& Instance() {
|
||||
static convolution_grad_weights_kernel_selector instance_;
|
||||
return instance_;
|
||||
}
|
||||
|
||||
convolution_grad_weights_kernel_selector();
|
||||
|
||||
virtual ~convolution_grad_weights_kernel_selector() {}
|
||||
|
||||
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,74 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "convolution_grad_weights_kernel_yxfb.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
ParamsKey ConvolutionGradWeightsKernel_yxfb::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableInputWeightsType(WeightsType::F32);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableInputLayout(DataLayout::yxfb);
|
||||
k.EnableOutputLayout(DataLayout::yxfb);
|
||||
k.EnableOutputLayout(DataLayout::bfyx);
|
||||
k.EnableOutputLayout(DataLayout::byxf);
|
||||
k.EnableSubGroup();
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBiasPerFeature();
|
||||
k.EnableNonBiasTerm();
|
||||
k.EnableMomentum();
|
||||
k.EnableBatching();
|
||||
k.EnableSplitSupport();
|
||||
k.EnableGradient();
|
||||
k.DisableTuning();
|
||||
return k;
|
||||
}
|
||||
|
||||
bool ConvolutionGradWeightsKernel_yxfb::Validate(const Params& p, const optional_params&) const {
|
||||
const convolution_grad_weights_params& params = static_cast<const convolution_grad_weights_params&>(p);
|
||||
auto batch = params.inputs[0].Batch().v;
|
||||
|
||||
if (batch % 16 != 0)
|
||||
return false;
|
||||
if (params.stride.x != 1 || params.stride.y != 1)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel_yxfb::SetDefault(
|
||||
const convolution_grad_weights_params& params) const {
|
||||
auto input_features = params.weights.IFM().v;
|
||||
auto output_features = params.weights.OFM().v;
|
||||
auto x = params.weights.X().v;
|
||||
auto y = params.weights.Y().v;
|
||||
|
||||
DispatchData kd;
|
||||
|
||||
kd.gws0 = 16;
|
||||
kd.gws1 = input_features * output_features;
|
||||
kd.gws2 = x * y;
|
||||
|
||||
kd.lws0 = 16;
|
||||
kd.lws1 = 1;
|
||||
kd.lws2 = 1;
|
||||
kd.efficiency = FORCE_PRIORITY_7;
|
||||
|
||||
return kd;
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,32 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "convolution_grad_weights_kernel_base.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class ConvolutionGradWeightsKernel_yxfb : public ConvolutionGradWeightsKernelBase {
|
||||
public:
|
||||
ConvolutionGradWeightsKernel_yxfb() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_yxfb") {}
|
||||
virtual ~ConvolutionGradWeightsKernel_yxfb() {}
|
||||
|
||||
DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -35,7 +35,6 @@ ParamsKey DeconvolutionKernel_bfyx_opt::GetSupportedKey() const {
|
||||
k.EnableBatching();
|
||||
k.EnableSplitSupport();
|
||||
k.EnableDepthwiseSeparableOpt();
|
||||
k.EnableGradient();
|
||||
k.EnableGroupedConvolution();
|
||||
return k;
|
||||
}
|
||||
|
@ -55,7 +55,6 @@ ParamsKey DeconvolutionKernelRef::GetSupportedKey() const {
|
||||
k.EnableBatching();
|
||||
k.EnableSplitSupport();
|
||||
k.EnableDepthwiseSeparableOpt();
|
||||
k.EnableGradient();
|
||||
k.EnableGroupedConvolution();
|
||||
k.EnableDifferentTypes();
|
||||
k.EnableDifferentInputWeightsTypes();
|
||||
|
@ -222,9 +222,7 @@ KernelsData EltwiseKernel_b_fs_yx_fsv16::GetKernelsData(const Params& params, co
|
||||
kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
|
||||
kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(),
|
||||
false,
|
||||
false,
|
||||
newParams.int8_quantization,
|
||||
newParams.output_calibration);
|
||||
false);
|
||||
|
||||
kd.estimatedTime = runInfo.efficiency;
|
||||
|
||||
|
@ -1,288 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2019-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "eltwise_kernel_b_fs_yx_fsv4.h"
|
||||
#include "kernel_selector_utils.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
ParamsKey EltwiseKernel_b_fs_yx_fsv4::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableInputDataType(Datatype::UINT8);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::UINT8);
|
||||
k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
|
||||
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4);
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBatching();
|
||||
k.EnableInt8Quantization();
|
||||
k.EnableEltwiseStride();
|
||||
return k;
|
||||
}
|
||||
|
||||
EltwiseKernelBase::DispatchData EltwiseKernel_b_fs_yx_fsv4::SetDefault(const eltwise_params& params) const {
|
||||
DispatchData kd;
|
||||
|
||||
// Because of very specific requirements for data, we may linearize the data,
|
||||
// i.e. use only one dimension, e.g. 'X'.
|
||||
|
||||
// GWS:
|
||||
// we process 4*4 (4 int8 bytes per on block_read4 reading) features per workitem
|
||||
kd.gws0 = params.output.X().v * params.output.Y().v * params.output.Batch().v * params.output.Feature().v / (4 * 4);
|
||||
kd.gws1 = 1;
|
||||
kd.gws2 = 1;
|
||||
// LWS:
|
||||
kd.lws0 = 8;
|
||||
kd.lws1 = 1;
|
||||
kd.lws2 = 1;
|
||||
|
||||
kd.efficiency = FORCE_PRIORITY_1;
|
||||
return kd;
|
||||
}
|
||||
|
||||
bool EltwiseKernel_b_fs_yx_fsv4::Validate(const Params& params, const optional_params& options) const {
|
||||
// Requirents to use 'eltwise_b_fs_yx_fsv4' kernel are below:
|
||||
// 1. No stride
|
||||
// 2. All dimensions for all inputs are the same
|
||||
// 3. No padding
|
||||
// So, it can be linearized
|
||||
|
||||
if (!Parent::Validate(params, options)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
KernelData kd = KernelData::Default<eltwise_params>(params);
|
||||
eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get());
|
||||
|
||||
// 1. No stride
|
||||
if (!newParams.stride.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < newParams.inputs.size() - 1; i++) {
|
||||
// 2. All dimensions for all inputs are the same
|
||||
if (!(newParams.inputs[i] == newParams.inputs[i + 1])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const auto& in = newParams.inputs[0];
|
||||
for (size_t i = 0; i < in.Dimentions(); i++) {
|
||||
// 3. No padding
|
||||
if ((in.GetDims()[i].pad.before != 0) || (in.GetDims()[i].pad.after != 0)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
JitConstants EltwiseKernel_b_fs_yx_fsv4::GetJitConstants(const eltwise_params& params) const {
|
||||
JitConstants jit = MakeBaseParamsJitConstants(params);
|
||||
|
||||
if (params.inputs[0].GetDType() == Datatype::UINT8) {
|
||||
// Special handler for unsigned types
|
||||
jit.AddConstants({MakeJitConstant("ELTW_UNSIGNED", 1)});
|
||||
}
|
||||
|
||||
///////////////
|
||||
jit.AddConstants({
|
||||
MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased),
|
||||
MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization),
|
||||
});
|
||||
|
||||
if (params.int8_quantization) {
|
||||
if (params.output_calibration) {
|
||||
jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration));
|
||||
jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0]));
|
||||
|
||||
} else {
|
||||
jit.AddConstants({MakeJitConstant("O_QF", params.output_quantization_factor)});
|
||||
}
|
||||
}
|
||||
|
||||
std::string inputs_decls;
|
||||
auto& updateInputs = params.updateInputIds;
|
||||
|
||||
for (size_t i = 0; i < params.inputs.size(); i++) {
|
||||
// const should be added only to inputs which will not be updated
|
||||
std::string const_str = "const";
|
||||
for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++) {
|
||||
if (updateInputs[update_input_idx].inputId == i) {
|
||||
const_str = "";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
inputs_decls +=
|
||||
const_str + " __global " + toCLType(params.inputs[i].GetDType()) + "* input" + std::to_string(i) + ", ";
|
||||
}
|
||||
|
||||
jit.AddConstant(MakeJitConstant("INPUTS_DECLS", inputs_decls));
|
||||
jit.AddConstant(MakeJitConstant("ELTWISE_NO_PITCH_SAME_DIMS", CheckInputsOutputNoPitchSameDims(params)));
|
||||
|
||||
std::string do_eltwise;
|
||||
|
||||
auto& operations = params.operations;
|
||||
auto& coefficients = params.coefficients;
|
||||
|
||||
for (size_t op_num = 0; op_num < operations.size(); op_num++) {
|
||||
const std::string op_num_str = std::to_string(op_num);
|
||||
const auto& ew = operations[op_num];
|
||||
|
||||
for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) {
|
||||
const auto& input = ew.inputs[input_idx];
|
||||
const std::string name = "INPUT_" + op_num_str + "_" + std::to_string(input_idx);
|
||||
switch (input.mode) {
|
||||
case EltwiseInputMode::SCALAR:
|
||||
jit.AddConstant(MakeJitConstant(name, input.scalar));
|
||||
break;
|
||||
case EltwiseInputMode::INPUT_BUFFER:
|
||||
jit.AddConstant(MakeJitConstant(name,
|
||||
"GET_INPUT(input" + std::to_string(input.index) + ", INPUT" +
|
||||
std::to_string(input.index) + ")"));
|
||||
break;
|
||||
case EltwiseInputMode::OUTPUT_BUFFER:
|
||||
jit.AddConstant(MakeJitConstant(name, "output[GET_INDEX(OUTPUT, )]"));
|
||||
break;
|
||||
case EltwiseInputMode::UNORDERED_ACCESS_INPUT_BUFFER:
|
||||
jit.AddConstant(MakeJitConstant(
|
||||
name,
|
||||
"input" + std::to_string(input.index) + "[(size_t)tmp" + std::to_string(input.tmpIndex) + "]"));
|
||||
break;
|
||||
case EltwiseInputMode::INTERMEDIATE_RESULTS_INDEX:
|
||||
jit.AddConstant(MakeJitConstant(name, "tmp" + std::to_string(input.tmpIndex)));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
std::string input0_str, input1_str, cast_type, op;
|
||||
|
||||
cast_type = "(int16)";
|
||||
op = "const int16 tmp" + op_num_str + " = ";
|
||||
|
||||
input0_str = cast_type + "INPUT_" + op_num_str + "_0";
|
||||
input1_str = cast_type + "INPUT_" + op_num_str + "_1";
|
||||
|
||||
if (ew.mode == EltwiseMode::ADD) {
|
||||
std::vector<std::string> coeff_strings(ew.inputs.size(), "");
|
||||
for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) {
|
||||
const auto& input = ew.inputs[input_idx];
|
||||
if (input.mode == EltwiseInputMode::INPUT_BUFFER && input.index < coefficients.size()) {
|
||||
const float c = coefficients[input.index];
|
||||
if (c != 1.0f)
|
||||
coeff_strings[input_idx] = cast_type + "(" + std::to_string(c) + ")*";
|
||||
}
|
||||
}
|
||||
|
||||
input0_str = coeff_strings[0] + input0_str;
|
||||
input1_str = coeff_strings[1] + input1_str;
|
||||
}
|
||||
|
||||
switch (ew.mode) {
|
||||
case EltwiseMode::ADD:
|
||||
op += input0_str + " + " + input1_str;
|
||||
break;
|
||||
case EltwiseMode::SUB:
|
||||
op += input0_str + " - " + input1_str;
|
||||
break;
|
||||
case EltwiseMode::MUL:
|
||||
op += input0_str + " * " + input1_str;
|
||||
break;
|
||||
case EltwiseMode::DIV:
|
||||
op += input0_str + " / " + input1_str;
|
||||
break;
|
||||
case EltwiseMode::MODULU:
|
||||
case EltwiseMode::MIN:
|
||||
case EltwiseMode::MAX: {
|
||||
auto mode = (ew.mode == EltwiseMode::MODULU ? "mod" : (ew.mode == EltwiseMode::MIN ? "min" : "max"));
|
||||
auto input_0_type = params.inputs[0].GetDType();
|
||||
auto input_1_type = params.inputs[1].GetDType();
|
||||
|
||||
// input_0 == int
|
||||
if (input_0_type == kernel_selector::Datatype::INT8 ||
|
||||
input_0_type == kernel_selector::Datatype::UINT8) {
|
||||
// input_0 == int && input_1 == int
|
||||
if (input_1_type == kernel_selector::Datatype::INT8 ||
|
||||
input_1_type == kernel_selector::Datatype::UINT8) {
|
||||
if (ew.mode == EltwiseMode::MODULU)
|
||||
op += input0_str + " % " + input1_str;
|
||||
else
|
||||
op += cast_type + mode + "(" + input0_str + ", " + input1_str + ")";
|
||||
// input_0 == int && input_1 != int
|
||||
} else {
|
||||
op += cast_type + "f" + mode + "(convert_float(" + input0_str + "), " + input1_str + ")";
|
||||
}
|
||||
// input_0 != int && input_1 == int
|
||||
} else if (input_1_type == kernel_selector::Datatype::INT8 ||
|
||||
input_1_type == kernel_selector::Datatype::UINT8) {
|
||||
op += cast_type + "f" + mode + "(" + input0_str + ", convert_float(" + input1_str + "))";
|
||||
// input_0 != int && input_1 != int
|
||||
} else {
|
||||
op += cast_type + "f" + mode + "(" + input0_str + ", " + input1_str + ")";
|
||||
}
|
||||
} break;
|
||||
case EltwiseMode::POW:
|
||||
op += cast_type + "pow(" + input0_str + ", " + input1_str + ")";
|
||||
break;
|
||||
case EltwiseMode::SQRT:
|
||||
op += cast_type + "sqrt(" + input0_str + ")";
|
||||
break;
|
||||
case EltwiseMode::RSQRT:
|
||||
op += cast_type + "1/sqrt(" + input0_str + ")";
|
||||
break;
|
||||
case EltwiseMode::ASSIGN:
|
||||
op += input0_str;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
std::string opname = "OPERATION" + op_num_str;
|
||||
jit.AddConstant(MakeJitConstant(opname, op));
|
||||
do_eltwise += "\\\n\t" + opname + ";";
|
||||
}
|
||||
|
||||
for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++)
|
||||
do_eltwise += "\\\n\tinput" + std::to_string(updateInputs[update_input_idx].inputId) + "[GET_INDEX(INPUT, " +
|
||||
std::to_string(updateInputs[update_input_idx].inputId) + ")] = tmp" +
|
||||
std::to_string(updateInputs[update_input_idx].tmpId) + ";";
|
||||
|
||||
do_eltwise += "\\\n\tres = tmp" + std::to_string(operations.size() - 1) + ";";
|
||||
|
||||
jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise));
|
||||
|
||||
if (params.layoutBased || params.int8_quantization) {
|
||||
jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0]));
|
||||
}
|
||||
|
||||
if (!params.stride.empty()) {
|
||||
jit.AddConstant(MakeJitConstant("INPUT_STRIDED", 1));
|
||||
}
|
||||
|
||||
///////////////
|
||||
return jit;
|
||||
}
|
||||
|
||||
KernelsData EltwiseKernel_b_fs_yx_fsv4::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
return GetCommonKernelsData(params, options);
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,36 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "eltwise_kernel_base.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
class EltwiseKernel_b_fs_yx_fsv4 : public EltwiseKernelBase {
|
||||
public:
|
||||
using Parent = EltwiseKernelBase;
|
||||
EltwiseKernel_b_fs_yx_fsv4() : EltwiseKernelBase("eltwise_b_fs_yx_fsv4") {}
|
||||
virtual ~EltwiseKernel_b_fs_yx_fsv4() {}
|
||||
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
|
||||
protected:
|
||||
bool Validate(const Params& params, const optional_params& options) const override;
|
||||
JitConstants GetJitConstants(const eltwise_params& params) const override;
|
||||
DispatchData SetDefault(const eltwise_params& params) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -51,17 +51,6 @@ static uint32_t GetNumberOfInputs(EltwiseMode m) {
|
||||
|
||||
ParamsKey eltwise_params::GetParamsKey() const {
|
||||
ParamsKey k = base_params::GetParamsKey();
|
||||
if (int8_quantization) {
|
||||
k.EnableInt8Quantization();
|
||||
}
|
||||
|
||||
if (output_calibration) {
|
||||
k.EnableOutputCalibration();
|
||||
}
|
||||
|
||||
if (inputs_calibration) {
|
||||
k.EnableEltwiseInputsCalibration();
|
||||
}
|
||||
|
||||
if (!stride.empty()) {
|
||||
k.EnableEltwiseStride();
|
||||
@ -617,9 +606,7 @@ KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const
|
||||
kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
|
||||
kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(),
|
||||
false,
|
||||
false,
|
||||
newParams.int8_quantization,
|
||||
newParams.output_calibration);
|
||||
false);
|
||||
|
||||
kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
|
||||
|
||||
|
@ -84,14 +84,8 @@ struct eltwise_params : public base_params {
|
||||
|
||||
bool layoutBased = false;
|
||||
bool int8_quantization = false;
|
||||
bool output_calibration = false;
|
||||
float output_quantization_factor = 1.0f;
|
||||
bool inputs_calibration = false;
|
||||
bool broadcast = false;
|
||||
|
||||
MultiDataTensor output_calibration_factors;
|
||||
MultiDataTensor inputs_calibration_factors;
|
||||
std::vector<float> input_quantization_factors;
|
||||
virtual ParamsKey GetParamsKey() const;
|
||||
};
|
||||
|
||||
|
@ -1,301 +0,0 @@
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h"
|
||||
#include "kernel_selector_utils.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
ParamsKey EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
|
||||
k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBatching();
|
||||
k.EnableInt8Quantization();
|
||||
k.EnableEltwiseStride();
|
||||
return k;
|
||||
}
|
||||
|
||||
EltwiseKernelBase::DispatchData EltwiseKernel_fs_bs_yx_bsv4_fsv32::SetDefault(const eltwise_params& params) const {
|
||||
DispatchData kd;
|
||||
|
||||
kd.gws0 = params.output.X().v;
|
||||
kd.gws1 = params.output.Y().v;
|
||||
// we process 4 batches and 4 features per workitem
|
||||
kd.gws2 = (params.output.Batch().v / 4) * (params.output.Feature().v / 4);
|
||||
kd.lws0 = 1;
|
||||
kd.lws1 = 1;
|
||||
kd.lws2 = 8;
|
||||
|
||||
kd.efficiency = FORCE_PRIORITY_3;
|
||||
return kd;
|
||||
}
|
||||
|
||||
JitConstants EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetJitConstants(const eltwise_params& params) const {
|
||||
JitConstants jit = MakeBaseParamsJitConstants(params);
|
||||
|
||||
const size_t in_x_pitch = 32 * 4;
|
||||
const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
|
||||
const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
|
||||
const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
|
||||
const size_t in_offset =
|
||||
in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
|
||||
|
||||
jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
|
||||
jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
|
||||
jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
|
||||
jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
|
||||
jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
|
||||
|
||||
///////////////
|
||||
jit.AddConstants({
|
||||
MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased),
|
||||
MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization),
|
||||
});
|
||||
|
||||
if (params.int8_quantization) {
|
||||
if (params.output_calibration) {
|
||||
jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration));
|
||||
jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0]));
|
||||
|
||||
} else {
|
||||
jit.AddConstants({MakeJitConstant("O_QF", params.output_quantization_factor)});
|
||||
}
|
||||
}
|
||||
|
||||
std::string inputs_decls;
|
||||
auto& updateInputs = params.updateInputIds;
|
||||
|
||||
for (size_t i = 0; i < params.inputs.size(); i++) {
|
||||
// const should be added only to inputs which will not be updated
|
||||
std::string const_str = "const";
|
||||
for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++) {
|
||||
if (updateInputs[update_input_idx].inputId == i) {
|
||||
const_str = "";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
inputs_decls +=
|
||||
const_str + " __global " + toCLType(params.inputs[i].GetDType()) + "* input" + std::to_string(i) + ", ";
|
||||
|
||||
if (!params.stride.empty()) {
|
||||
jit.AddConstant(MakeJitConstant("INPUT" + std::to_string(i) + "_STRIDE_X", params.stride[i].x));
|
||||
jit.AddConstant(MakeJitConstant("INPUT" + std::to_string(i) + "_STRIDE_Y", params.stride[i].y));
|
||||
}
|
||||
}
|
||||
|
||||
jit.AddConstant(MakeJitConstant("INPUTS_DECLS", inputs_decls));
|
||||
jit.AddConstant(MakeJitConstant("ELTWISE_NO_PITCH_SAME_DIMS", CheckInputsOutputNoPitchSameDims(params)));
|
||||
|
||||
std::string do_eltwise;
|
||||
|
||||
auto& operations = params.operations;
|
||||
auto& coefficients = params.coefficients;
|
||||
|
||||
for (size_t op_num = 0; op_num < operations.size(); op_num++) {
|
||||
const std::string op_num_str = std::to_string(op_num);
|
||||
const auto& ew = operations[op_num];
|
||||
|
||||
for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) {
|
||||
const auto& input = ew.inputs[input_idx];
|
||||
const std::string name = "INPUT_" + op_num_str + "_" + std::to_string(input_idx);
|
||||
switch (input.mode) {
|
||||
case EltwiseInputMode::SCALAR:
|
||||
jit.AddConstant(MakeJitConstant(name, input.scalar));
|
||||
break;
|
||||
case EltwiseInputMode::INPUT_BUFFER:
|
||||
jit.AddConstant(MakeJitConstant(name,
|
||||
"GET_INPUT(input" + std::to_string(input.index) + ", INPUT" +
|
||||
std::to_string(input.index) + ")"));
|
||||
break;
|
||||
case EltwiseInputMode::OUTPUT_BUFFER:
|
||||
jit.AddConstant(MakeJitConstant(name, "output[GET_INDEX(OUTPUT, )]"));
|
||||
break;
|
||||
case EltwiseInputMode::UNORDERED_ACCESS_INPUT_BUFFER:
|
||||
jit.AddConstant(MakeJitConstant(
|
||||
name,
|
||||
"input" + std::to_string(input.index) + "[(size_t)tmp" + std::to_string(input.tmpIndex) + "]"));
|
||||
break;
|
||||
case EltwiseInputMode::INTERMEDIATE_RESULTS_INDEX:
|
||||
jit.AddConstant(MakeJitConstant(name, "tmp" + std::to_string(input.tmpIndex)));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
std::string input0_str, input1_str, cast_type, op;
|
||||
|
||||
if (params.int8_quantization) {
|
||||
cast_type = "(int16)";
|
||||
op = "const int16 tmp" + op_num_str + " = ";
|
||||
} else {
|
||||
cast_type = "(UNIT_TYPE)";
|
||||
op = "const UNIT_TYPE tmp" + op_num_str + " = ";
|
||||
}
|
||||
|
||||
input0_str = cast_type + "INPUT_" + op_num_str + "_0";
|
||||
input1_str = cast_type + "INPUT_" + op_num_str + "_1";
|
||||
|
||||
if (ew.mode == EltwiseMode::ADD) {
|
||||
std::vector<std::string> coeff_strings(ew.inputs.size(), "");
|
||||
for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) {
|
||||
const auto& input = ew.inputs[input_idx];
|
||||
if (input.mode == EltwiseInputMode::INPUT_BUFFER && input.index < coefficients.size()) {
|
||||
const float c = coefficients[input.index];
|
||||
if (c != 1.0f)
|
||||
coeff_strings[input_idx] = cast_type + "(" + std::to_string(c) + ")*";
|
||||
}
|
||||
}
|
||||
|
||||
input0_str = coeff_strings[0] + input0_str;
|
||||
input1_str = coeff_strings[1] + input1_str;
|
||||
}
|
||||
|
||||
switch (ew.mode) {
|
||||
case EltwiseMode::ADD:
|
||||
op += input0_str + " + " + input1_str;
|
||||
break;
|
||||
case EltwiseMode::SUB:
|
||||
op += input0_str + " - " + input1_str;
|
||||
break;
|
||||
case EltwiseMode::MUL:
|
||||
op += input0_str + " * " + input1_str;
|
||||
break;
|
||||
case EltwiseMode::DIV:
|
||||
op += input0_str + " / " + input1_str;
|
||||
break;
|
||||
case EltwiseMode::MODULU:
|
||||
case EltwiseMode::MIN:
|
||||
case EltwiseMode::MAX: {
|
||||
auto mode = (ew.mode == EltwiseMode::MODULU ? "mod" : (ew.mode == EltwiseMode::MIN ? "min" : "max"));
|
||||
auto input_0_type = params.inputs[0].GetDType();
|
||||
auto input_1_type = params.inputs[1].GetDType();
|
||||
|
||||
// input_0 == int
|
||||
if (input_0_type == kernel_selector::Datatype::INT8 ||
|
||||
input_0_type == kernel_selector::Datatype::INT32 ||
|
||||
input_0_type == kernel_selector::Datatype::INT64) {
|
||||
// input_0 == int && input_1 == int
|
||||
if (input_1_type == kernel_selector::Datatype::INT8 ||
|
||||
input_1_type == kernel_selector::Datatype::INT32 ||
|
||||
input_1_type == kernel_selector::Datatype::INT64) {
|
||||
if (ew.mode == EltwiseMode::MODULU)
|
||||
op += input0_str + " % " + input1_str;
|
||||
else
|
||||
op += cast_type + mode + "(" + input0_str + ", " + input1_str + ")";
|
||||
// input_0 == int && input_1 != int
|
||||
} else {
|
||||
op += cast_type + "f" + mode + "(convert_float(" + input0_str + "), " + input1_str + ")";
|
||||
}
|
||||
// input_0 != int && input_1 == int
|
||||
} else if (input_1_type == kernel_selector::Datatype::INT8 ||
|
||||
input_1_type == kernel_selector::Datatype::INT32 ||
|
||||
input_1_type == kernel_selector::Datatype::INT64) {
|
||||
op += cast_type + "f" + mode + "(" + input0_str + ", convert_float(" + input1_str + "))";
|
||||
// input_0 != int && input_1 != int
|
||||
} else {
|
||||
op += cast_type + "f" + mode + "(" + input0_str + ", " + input1_str + ")";
|
||||
}
|
||||
} break;
|
||||
case EltwiseMode::POW:
|
||||
op += cast_type + "pow(" + input0_str + ", " + input1_str + ")";
|
||||
break;
|
||||
case EltwiseMode::SQRT:
|
||||
op += cast_type + "sqrt(" + input0_str + ")";
|
||||
break;
|
||||
case EltwiseMode::RSQRT:
|
||||
op += cast_type + "1/sqrt(" + input0_str + ")";
|
||||
break;
|
||||
case EltwiseMode::SQUARED_DIFF:
|
||||
op += cast_type + "((" + input0_str + " - " + input1_str +
|
||||
")"
|
||||
" * (" +
|
||||
input0_str + " - " + input1_str + "))";
|
||||
break;
|
||||
case EltwiseMode::EQ:
|
||||
op += cast_type + "(" + input0_str + " == " + input1_str + ")";
|
||||
break;
|
||||
case EltwiseMode::NE:
|
||||
op += cast_type + "(" + input0_str + " != " + input1_str + ")";
|
||||
break;
|
||||
case EltwiseMode::LT:
|
||||
op += cast_type + "(" + input0_str + " < " + input1_str + ")";
|
||||
break;
|
||||
case EltwiseMode::LE:
|
||||
op += cast_type + "(" + input0_str + " <= " + input1_str + ")";
|
||||
break;
|
||||
case EltwiseMode::GT:
|
||||
op += cast_type + "(" + input0_str + " > " + input1_str + ")";
|
||||
break;
|
||||
case EltwiseMode::GE:
|
||||
op += cast_type + "(" + input0_str + " >= " + input1_str + ")";
|
||||
break;
|
||||
case EltwiseMode::LOGIC_AND:
|
||||
op += cast_type + "(" + input0_str + " && " + input1_str + ")";
|
||||
break;
|
||||
case EltwiseMode::LOGIC_OR:
|
||||
op += cast_type + "(" + input0_str + " || " + input1_str + ")";
|
||||
break;
|
||||
case EltwiseMode::LOGIC_XOR:
|
||||
op += cast_type + "(!" + input0_str + " != !" + input1_str + ")";
|
||||
break;
|
||||
case EltwiseMode::FLOOR_MOD:
|
||||
op += cast_type + "(" + input0_str + " - " + input0_str + " / " + input1_str + " * " + input1_str + ")";
|
||||
break;
|
||||
case EltwiseMode::ASSIGN:
|
||||
op += input0_str;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
std::string opname = "OPERATION" + op_num_str;
|
||||
jit.AddConstant(MakeJitConstant(opname, op));
|
||||
do_eltwise += "\\\n\t" + opname + ";";
|
||||
}
|
||||
|
||||
for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++)
|
||||
do_eltwise += "\\\n\tinput" + std::to_string(updateInputs[update_input_idx].inputId) + "[GET_INDEX(INPUT, " +
|
||||
std::to_string(updateInputs[update_input_idx].inputId) + ")] = tmp" +
|
||||
std::to_string(updateInputs[update_input_idx].tmpId) + ";";
|
||||
|
||||
do_eltwise += "\\\n\tres = tmp" + std::to_string(operations.size() - 1) + ";";
|
||||
|
||||
jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise));
|
||||
|
||||
if (params.layoutBased || params.int8_quantization) {
|
||||
jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0]));
|
||||
}
|
||||
|
||||
if (!params.stride.empty()) {
|
||||
jit.AddConstant(MakeJitConstant("INPUT_STRIDED", 1));
|
||||
}
|
||||
|
||||
///////////////
|
||||
return jit;
|
||||
}
|
||||
|
||||
KernelsData EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params,
|
||||
const optional_params& options) const {
|
||||
return GetCommonKernelsData(params, options);
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,32 +0,0 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "eltwise_kernel_base.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
class EltwiseKernel_fs_bs_yx_bsv4_fsv32 : public EltwiseKernelBase {
|
||||
public:
|
||||
EltwiseKernel_fs_bs_yx_bsv4_fsv32() : EltwiseKernelBase("eltwise_fs_bs_yx_bsv4_fsv32") {}
|
||||
virtual ~EltwiseKernel_fs_bs_yx_bsv4_fsv32() {}
|
||||
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
|
||||
protected:
|
||||
JitConstants GetJitConstants(const eltwise_params& params) const override;
|
||||
DispatchData SetDefault(const eltwise_params& params) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -38,7 +38,6 @@ ParamsKey EltwiseKernelRef::GetSupportedKey() const {
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBatching();
|
||||
k.EnableInt8Quantization();
|
||||
k.EnableEltwiseStride();
|
||||
k.EnableEltwiseBroadcast();
|
||||
return k;
|
||||
|
@ -16,8 +16,6 @@
|
||||
#include "eltwise_kernel_selector.h"
|
||||
#include "eltwise_kernel_ref.h"
|
||||
#include "eltwise_kernel_vload8.h"
|
||||
#include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h"
|
||||
#include "eltwise_kernel_b_fs_yx_fsv4.h"
|
||||
#include "eltwise_kernel_fs_b_yx_fsv32.h"
|
||||
#include "eltwise_kernel_b_fs_yx_fsv16.h"
|
||||
#include "eltwise_kernel_mixed_byxf_and_fs_b_yx_fsv32.h"
|
||||
@ -26,8 +24,6 @@ namespace kernel_selector {
|
||||
eltwise_kernel_selector::eltwise_kernel_selector() {
|
||||
Attach<EltwiseKernelRef>();
|
||||
Attach<EltwiseKernel_vload8>();
|
||||
Attach<EltwiseKernel_fs_bs_yx_bsv4_fsv32>();
|
||||
Attach<EltwiseKernel_b_fs_yx_fsv4>();
|
||||
Attach<EltwiseKernel_fs_b_yx_fsv32>();
|
||||
Attach<EltwiseKernel_mixed_byxf_and_fs_b_yx_fsv32>();
|
||||
Attach<EltwiseKernel_b_fs_yx_fsv16>();
|
||||
|
@ -1,108 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "embed_kernel_ref.h"
|
||||
#include "kernel_selector_utils.h"
|
||||
#include "common_tools.h"
|
||||
#include <vector>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
ParamsKey EmbedKernelRef::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::F16);
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::F16);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
k.EnableInputWeightsType(WeightsType::F16);
|
||||
k.EnableInputWeightsType(WeightsType::F32);
|
||||
k.EnableInputWeightsType(WeightsType::INT8);
|
||||
k.EnableAllInputLayout();
|
||||
k.EnableOutputLayout(DataLayout::bf);
|
||||
k.EnableBiasPerOutput();
|
||||
k.EnableBiasPerFeature();
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBatching();
|
||||
k.EnableNonBiasTerm();
|
||||
return k;
|
||||
}
|
||||
|
||||
JitConstants EmbedKernelRef::GetJitConstants(const embed_params& params) const {
|
||||
JitConstants jit = WeightBiasKernelBase::GetJitConstants(params);
|
||||
const auto& input = params.inputs[0];
|
||||
const auto x_size = input.LogicalSize() / input.Batch().v;
|
||||
const auto w_size = params.weights.OFM().v;
|
||||
jit.AddConstant(MakeJitConstant("INPUT0_ELEMENTS_COUNT", x_size));
|
||||
jit.AddConstant(MakeJitConstant("NUM_OUTPUT_SIZE", w_size));
|
||||
|
||||
return jit;
|
||||
}
|
||||
|
||||
EmbedKernelRef::DispatchData EmbedKernelRef::SetDefault(const embed_params& params) const {
|
||||
DispatchData kd;
|
||||
std::vector<size_t> global = {params.inputs[0].X().v, params.weights.OFM().v, params.inputs[0].Batch().v};
|
||||
std::vector<size_t> local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
|
||||
|
||||
kd.gws0 = global[0];
|
||||
kd.gws1 = global[1];
|
||||
kd.gws2 = global[2];
|
||||
|
||||
kd.lws0 = local[0];
|
||||
kd.lws1 = local[1];
|
||||
kd.lws2 = 1;
|
||||
return kd;
|
||||
}
|
||||
|
||||
KernelsData EmbedKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
assert(params.GetType() == KernelType::EMBED);
|
||||
|
||||
const embed_params& orgParams = static_cast<const embed_params&>(params);
|
||||
|
||||
DispatchData runInfo = SetDefault(orgParams);
|
||||
KernelData kd = KernelData::Default<embed_params>(params);
|
||||
embed_params& newParams = *static_cast<embed_params*>(kd.params.get());
|
||||
|
||||
bool succeed = UpdateWeightsParams(newParams, options, WeightsLayout::oiyx, kd.weightsReorderParams);
|
||||
|
||||
if (!succeed) {
|
||||
return {};
|
||||
}
|
||||
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
||||
FillCLKernelData(kernel,
|
||||
runInfo,
|
||||
params.engineInfo,
|
||||
kernelName,
|
||||
jit,
|
||||
entry_point,
|
||||
DEFAULT,
|
||||
true,
|
||||
!newParams.bias.empty());
|
||||
|
||||
kd.estimatedTime = runInfo.efficiency;
|
||||
|
||||
return {kd};
|
||||
}
|
||||
|
||||
} // namespace kernel_selector
|
@ -1,42 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "weight_bias_kernel_base.h"
|
||||
#include "embed_params.h"
|
||||
#include "common_kernel_base.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// EmbedKernelRef
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class EmbedKernelRef : public WeightBiasKernelBase {
|
||||
public:
|
||||
EmbedKernelRef() : WeightBiasKernelBase("embed_ref") {}
|
||||
virtual ~EmbedKernelRef() {}
|
||||
|
||||
struct DispatchData : public CommonDispatchData {};
|
||||
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
|
||||
protected:
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
virtual JitConstants GetJitConstants(const embed_params& params) const;
|
||||
virtual DispatchData SetDefault(const embed_params& params) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,27 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "embed_kernel_selector.h"
|
||||
#include "embed_kernel_ref.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
embed_kernel_selector::embed_kernel_selector() { Attach<EmbedKernelRef>(); }
|
||||
|
||||
KernelsData embed_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
|
||||
return GetNaiveBestKernel(params, options, KernelType::EMBED);
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,35 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_selector.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
class embed_kernel_selector : public kernel_selector_base {
|
||||
public:
|
||||
static embed_kernel_selector& Instance() {
|
||||
static embed_kernel_selector instance_;
|
||||
return instance_;
|
||||
}
|
||||
|
||||
embed_kernel_selector();
|
||||
|
||||
virtual ~embed_kernel_selector() {}
|
||||
|
||||
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,51 +0,0 @@
|
||||
/*
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "weight_bias_params.h"
|
||||
#include <string>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// embed_params
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct embed_params : public weight_bias_params {
|
||||
embed_params() : weight_bias_params(KernelType::EMBED) {}
|
||||
|
||||
std::string to_string() const {
|
||||
std::stringstream s;
|
||||
|
||||
s << base_params::to_string() << "_";
|
||||
if (bias.empty()) {
|
||||
s << "no_bias"
|
||||
<< "_";
|
||||
} else {
|
||||
s << "bias_" << bias[0].PhysicalSize() << "_";
|
||||
}
|
||||
return s.str();
|
||||
}
|
||||
virtual ParamsKey GetParamsKey() const { return weight_bias_params::GetParamsKey(); }
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// embed_optional_params
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct embed_optional_params : weight_bias_optional_params {
|
||||
embed_optional_params() : weight_bias_optional_params(KernelType::EMBED) {}
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -29,8 +29,6 @@ JitConstants FullyConnectedKernelBase::GetJitConstants(const fully_connected_par
|
||||
|
||||
jit.AddConstant(MakeJitConstant("INPUT0_ELEMENTS_COUNT", x_size));
|
||||
|
||||
jit.AddConstant(MakeJitConstant("QUANTIZATION_TERM", params.quantization != QuantizationType::NONE));
|
||||
|
||||
return jit;
|
||||
}
|
||||
|
||||
|
@ -1,122 +0,0 @@
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "fully_connected_kernel_mmad_batched.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
ParamsKey FullyConnected_mmad_batched::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
k.EnableInputWeightsType(WeightsType::INT8);
|
||||
k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
|
||||
k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
|
||||
k.EnableOutputLayout(DataLayout::bf);
|
||||
k.EnableBiasPerOutput();
|
||||
k.EnableBiasPerFeature();
|
||||
k.EnableNonBiasTerm();
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBatching();
|
||||
k.EnableInt8Quantization();
|
||||
k.EnableOutputCalibration();
|
||||
return k;
|
||||
}
|
||||
|
||||
bool FullyConnected_mmad_batched::Validate(const Params& p, const optional_params& o) const {
|
||||
if (!FullyConnectedKernelBase::Validate(p, o)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& params = static_cast<const fully_connected_params&>(p);
|
||||
|
||||
// we do not support padded input
|
||||
if (params.inputs[0].X().pad.Total() != 0 || params.inputs[0].Y().pad.Total() != 0)
|
||||
return false;
|
||||
|
||||
size_t batch = params.inputs[0].Batch().v;
|
||||
// batch must be a multiple of 8
|
||||
if (batch % 8 != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
JitConstants FullyConnected_mmad_batched::GetJitConstants(const fully_connected_params& params,
|
||||
const DispatchData& runInfo) const {
|
||||
auto jit = Parent::GetJitConstants(params, runInfo);
|
||||
|
||||
jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws1));
|
||||
|
||||
// pitch for special block format used in this kernel
|
||||
const size_t ifm_32_aligned = Align(params.weights.IFM().v, 32);
|
||||
const size_t filter_ofm_block_pitch =
|
||||
(ifm_32_aligned / 32) * params.weights.X().v * params.weights.Y().v * 4 * 8 * 8;
|
||||
jit.AddConstant(MakeJitConstant("FILTER_OFM_BLOCK_PITCH", filter_ofm_block_pitch));
|
||||
|
||||
const size_t in_x_pitch = 32 * 4;
|
||||
const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
|
||||
const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
|
||||
const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
|
||||
const size_t in_offset =
|
||||
in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
|
||||
|
||||
jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
|
||||
jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
|
||||
jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
|
||||
jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
|
||||
jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
|
||||
|
||||
return jit;
|
||||
}
|
||||
|
||||
FullyConnected_mmad_batched::DispatchData FullyConnected_mmad_batched::SetDefault(const fully_connected_params& params,
|
||||
int) const {
|
||||
auto runInfo = Parent::SetDefault(params);
|
||||
|
||||
constexpr size_t sub_group_size = 8;
|
||||
|
||||
const auto of_maps = params.output.Feature().v;
|
||||
const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
|
||||
|
||||
runInfo.gws0 = params.output.Batch().v / 8; // we process 8 batches in a single WG
|
||||
runInfo.gws1 = of_threads_per_batch;
|
||||
runInfo.gws2 = 1;
|
||||
|
||||
runInfo.lws0 = 1;
|
||||
runInfo.lws1 = sub_group_size;
|
||||
runInfo.lws2 = 1;
|
||||
|
||||
runInfo.efficiency = FORCE_PRIORITY_1;
|
||||
return runInfo;
|
||||
}
|
||||
|
||||
KernelsData FullyConnected_mmad_batched::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
KernelsData res = {};
|
||||
for (size_t i = 0; i < autoTuneOptions.size(); i++) {
|
||||
KernelsData kd = GetTunedKernelsDataByIndex(params,
|
||||
options,
|
||||
DataLayout::fs_bs_yx_bsv4_fsv32,
|
||||
WeightsLayout::os_is_yx_isa8_osv8_isv4,
|
||||
FORCE_PRIORITY_1,
|
||||
static_cast<int>(i));
|
||||
if (!kd.empty()) {
|
||||
res.emplace_back(kd[0]);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,36 +0,0 @@
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "fully_connected_kernel_base.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class FullyConnected_mmad_batched : public FullyConnectedKernelBase {
|
||||
public:
|
||||
using Parent = FullyConnectedKernelBase;
|
||||
|
||||
FullyConnected_mmad_batched() : Parent("fully_connected_gpu_mmad_batched") {}
|
||||
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
|
||||
protected:
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
|
||||
DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -28,7 +28,6 @@
|
||||
#include "fully_connected_kernel_fb_io_block.h"
|
||||
#include "fully_connected_kernel_bf_io_input_spatial.h"
|
||||
#include "fully_connected_kernel_mmad.h"
|
||||
#include "fully_connected_kernel_mmad_batched.h"
|
||||
#include "fully_connected_kernel_imad.h"
|
||||
#include "fully_connected_kernel_fs_byx_fsv32.h"
|
||||
|
||||
@ -49,7 +48,6 @@ fully_connected_kernel_selector::fully_connected_kernel_selector() {
|
||||
Attach<FullyConnected_fb_io_b8_f8>();
|
||||
Attach<FullyConnected_bf_io_input_spatial>();
|
||||
Attach<FullyConnectedKernelMMAD>();
|
||||
// Attach<FullyConnected_mmad_batched>();
|
||||
Attach<FullyConnectedKernelIMAD>();
|
||||
Attach<FullyConnected_fs_byx_fsv32>();
|
||||
}
|
||||
|
@ -1,82 +0,0 @@
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "fully_connected_grad_input_kernel_base.h"
|
||||
#include "kernel_selector_utils.h"
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
namespace kernel_selector {
|
||||
JitConstants FullyConnectedGradInputKernelBase::GetJitConstants(const fully_connected_grad_input_params& params) const {
|
||||
return WeightBiasKernelBase::GetJitConstants(params);
|
||||
}
|
||||
|
||||
FullyConnectedGradInputKernelBase::DispatchData FullyConnectedGradInputKernelBase::SetDefault(
|
||||
const fully_connected_grad_input_params& params) const {
|
||||
DispatchData kd;
|
||||
|
||||
kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
|
||||
size_t gws0 = params.output.Batch().v * params.weights.IFM().v;
|
||||
size_t lws0 = std::min(gws0, static_cast<size_t>(32));
|
||||
while (gws0 % lws0) {
|
||||
lws0--;
|
||||
}
|
||||
kd.gws0 = gws0;
|
||||
kd.gws1 = params.weights.X().v;
|
||||
kd.gws2 = params.weights.Y().v;
|
||||
kd.lws0 = lws0;
|
||||
kd.lws1 = 1;
|
||||
kd.lws2 = 1;
|
||||
kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
|
||||
return kd;
|
||||
}
|
||||
|
||||
KernelsData FullyConnectedGradInputKernelBase::GetKernelsData(const Params& params,
|
||||
const optional_params& options) const {
|
||||
assert(params.GetType() == KernelType::FULLY_CONNECTED_GRAD_INPUT);
|
||||
|
||||
const fully_connected_grad_input_params& orgParams = static_cast<const fully_connected_grad_input_params&>(params);
|
||||
|
||||
DispatchData runInfo = SetDefault(orgParams);
|
||||
KernelData kd = KernelData::Default<fully_connected_grad_input_params>(params);
|
||||
fully_connected_grad_input_params& newParams = *static_cast<fully_connected_grad_input_params*>(kd.params.get());
|
||||
|
||||
bool succeed = UpdateWeightsParams(newParams, options, WeightsLayout::oi, kd.weightsReorderParams);
|
||||
|
||||
if (!succeed) {
|
||||
return {};
|
||||
}
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
FillCLKernelData(kernel,
|
||||
runInfo,
|
||||
params.engineInfo,
|
||||
kernelName,
|
||||
jit,
|
||||
entry_point,
|
||||
DEFAULT,
|
||||
true,
|
||||
!orgParams.bias.empty());
|
||||
kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
|
||||
|
||||
kd.estimatedTime = runInfo.efficiency;
|
||||
|
||||
return {kd};
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,54 +0,0 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "weight_bias_kernel_base.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// fully_connected_grad_input_params
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct fully_connected_grad_input_params : public weight_bias_params {
|
||||
fully_connected_grad_input_params() : weight_bias_params(KernelType::FULLY_CONNECTED_GRAD_INPUT) {}
|
||||
|
||||
virtual ParamsKey GetParamsKey() const { return weight_bias_params::GetParamsKey(); }
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// fully_connected_grad_input_optional_params
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct fully_connected_grad_input_optional_params : weight_bias_optional_params {
|
||||
fully_connected_grad_input_optional_params()
|
||||
: weight_bias_optional_params(KernelType::FULLY_CONNECTED_GRAD_INPUT) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// FullyConnectedGradInputKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class FullyConnectedGradInputKernelBase : public WeightBiasKernelBase {
|
||||
public:
|
||||
using WeightBiasKernelBase::WeightBiasKernelBase;
|
||||
virtual ~FullyConnectedGradInputKernelBase() {}
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
||||
protected:
|
||||
virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const;
|
||||
virtual JitConstants GetJitConstants(const fully_connected_grad_input_params& params) const;
|
||||
virtual DispatchData SetDefault(const fully_connected_grad_input_params& params) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,44 +0,0 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "fully_connected_grad_input_kernel_ref.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
ParamsKey FullyConnectedGradInputKernelRef::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::F16);
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableInputWeightsType(WeightsType::F16);
|
||||
k.EnableInputWeightsType(WeightsType::F32);
|
||||
k.EnableOutputDataType(Datatype::F16);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableInputLayout(DataLayout::yxfb);
|
||||
k.EnableInputLayout(DataLayout::bfyx);
|
||||
k.EnableInputLayout(DataLayout::byxf);
|
||||
k.EnableOutputLayout(DataLayout::yxfb);
|
||||
k.EnableOutputLayout(DataLayout::bfyx);
|
||||
k.EnableOutputLayout(DataLayout::byxf);
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBiasPerFeature();
|
||||
k.EnableNonBiasTerm();
|
||||
// TODO: add support to batching, figure out the way to update weights/biases for multiple batches at the same time
|
||||
k.EnableBatching();
|
||||
k.EnableGradient();
|
||||
k.DisableTuning();
|
||||
return k;
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,29 +0,0 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "fully_connected_grad_input_kernel_base.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class FullyConnectedGradInputKernelRef : public FullyConnectedGradInputKernelBase {
|
||||
public:
|
||||
FullyConnectedGradInputKernelRef() : FullyConnectedGradInputKernelBase("fully_connected_grad_input_gpu_ref") {}
|
||||
virtual ~FullyConnectedGradInputKernelRef() {}
|
||||
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,28 +0,0 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "fully_connected_grad_input_kernel_selector.h"
|
||||
#include "fully_connected_grad_input_kernel_ref.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
fully_connected_grad_input_kernel_selector::fully_connected_grad_input_kernel_selector() {
|
||||
Attach<FullyConnectedGradInputKernelRef>();
|
||||
}
|
||||
|
||||
KernelsData fully_connected_grad_input_kernel_selector::GetBestKernels(const Params& params,
|
||||
const optional_params& options) const {
|
||||
return GetNaiveBestKernel(params, options, KernelType::FULLY_CONNECTED_GRAD_INPUT);
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,34 +0,0 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_selector.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
class fully_connected_grad_input_kernel_selector : public kernel_selector_base {
|
||||
public:
|
||||
static fully_connected_grad_input_kernel_selector& Instance() {
|
||||
static fully_connected_grad_input_kernel_selector instance_;
|
||||
return instance_;
|
||||
}
|
||||
|
||||
fully_connected_grad_input_kernel_selector();
|
||||
|
||||
virtual ~fully_connected_grad_input_kernel_selector() {}
|
||||
|
||||
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,93 +0,0 @@
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "fully_connected_grad_weights_kernel_base.h"
|
||||
#include "kernel_selector_utils.h"
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
namespace kernel_selector {
|
||||
JitConstants FullyConnectedGradWeightsKernelBase::GetJitConstants(
|
||||
const fully_connected_grad_weights_params& params) const {
|
||||
JitConstants jit = training_kernel_base::GetJitConstants(params);
|
||||
|
||||
return jit;
|
||||
}
|
||||
|
||||
FullyConnectedGradWeightsKernelBase::DispatchData FullyConnectedGradWeightsKernelBase::SetDefault(
|
||||
const fully_connected_grad_weights_params& params) const {
|
||||
DispatchData kd;
|
||||
|
||||
kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
|
||||
size_t gws0 = params.weights.OFM().v * params.weights.IFM().v;
|
||||
size_t lws0 = std::min(gws0, static_cast<size_t>(32));
|
||||
while (gws0 % lws0) {
|
||||
lws0--;
|
||||
}
|
||||
kd.gws0 = gws0;
|
||||
kd.gws1 = params.weights.X().v;
|
||||
kd.gws2 = params.weights.Y().v;
|
||||
kd.lws0 = lws0;
|
||||
kd.lws1 = 1;
|
||||
kd.lws2 = 1;
|
||||
kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
|
||||
return kd;
|
||||
}
|
||||
|
||||
KernelsData FullyConnectedGradWeightsKernelBase::GetKernelsData(const Params& params,
|
||||
const optional_params& options) const {
|
||||
assert(params.GetType() == KernelType::FULLY_CONNECTED_GRAD_WEIGHTS);
|
||||
|
||||
const fully_connected_grad_weights_params& orgParams =
|
||||
static_cast<const fully_connected_grad_weights_params&>(params);
|
||||
|
||||
DispatchData runInfo = SetDefault(orgParams);
|
||||
KernelData kd = KernelData::Default<fully_connected_grad_weights_params>(params);
|
||||
fully_connected_grad_weights_params& newParams =
|
||||
*static_cast<fully_connected_grad_weights_params*>(kd.params.get());
|
||||
|
||||
bool succeed = UpdateWeightsParams(newParams, options, WeightsLayout::oi, kd.weightsReorderParams);
|
||||
|
||||
if (!succeed) {
|
||||
return {};
|
||||
}
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
FillCLKernelData(kernel,
|
||||
runInfo,
|
||||
params.engineInfo,
|
||||
kernelName,
|
||||
jit,
|
||||
entry_point,
|
||||
DEFAULT,
|
||||
true,
|
||||
!orgParams.bias.empty());
|
||||
if (orgParams.use_momentum) {
|
||||
kernel.arguments.push_back({ArgumentDescriptor::Types::PREV_WEIGHTS_GRADIENT, 0});
|
||||
if (!orgParams.bias.empty())
|
||||
kernel.arguments.push_back({ArgumentDescriptor::Types::PREV_BIAS_GRADIENT, 0});
|
||||
}
|
||||
kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
|
||||
kernel.arguments.push_back({ArgumentDescriptor::Types::LEARNING_RATE, 0});
|
||||
|
||||
kd.estimatedTime = runInfo.efficiency;
|
||||
|
||||
return {kd};
|
||||
}
|
||||
} // namespace kernel_selector
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user