Merge remote-tracking branch 'upstream/master' into ie/jane/batch-size
This commit is contained in:
commit
6f271a4439
@ -2,12 +2,6 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
cmake_policy(SET CMP0054 NEW)
|
||||
|
||||
# TODO: for make instal / package we need to use 3.13.3 version because
|
||||
# it allows to install targets created outside of current projects
|
||||
# See https://blog.kitware.com/cmake-3-13-0-available-for-download/
|
||||
|
||||
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
||||
|
||||
project(OpenVINO)
|
||||
|
@ -113,7 +113,7 @@
|
||||
<tab type="user" title="Atan-1" url="@ref openvino_docs_ops_arithmetic_Atan_1"/>
|
||||
<tab type="user" title="Atanh-3" url="@ref openvino_docs_ops_arithmetic_Atanh_3"/>
|
||||
<tab type="user" title="AvgPool-1" url="@ref openvino_docs_ops_pooling_AvgPool_1"/>
|
||||
<tab type="user" title="BatchNormInference-1" url="@ref openvino_docs_ops_normalization_BatchNormInference_1"/>
|
||||
<tab type="user" title="BatchNormInference-5" url="@ref openvino_docs_ops_normalization_BatchNormInference_5"/>
|
||||
<tab type="user" title="BatchToSpace-2" url="@ref openvino_docs_ops_movement_BatchToSpace_2"/>
|
||||
<tab type="user" title="BinaryConvolution-1" url="@ref openvino_docs_ops_convolution_BinaryConvolution_1"/>
|
||||
<tab type="user" title="Broadcast-1" url="@ref openvino_docs_ops_movement_Broadcast_1"/>
|
||||
|
98
docs/ops/normalization/BatchNormInference_5.md
Normal file
98
docs/ops/normalization/BatchNormInference_5.md
Normal file
@ -0,0 +1,98 @@
|
||||
## BatchNormInference <a name="BatchNormInference"></a> {#openvino_docs_ops_normalization_BatchNormInference_5}
|
||||
|
||||
**Versioned name**: *BatchNormInference-5
|
||||
|
||||
**Category**: *Normalization*
|
||||
|
||||
**Short description**: *BatchNormInference* layer normalizes a `input` tensor by `mean` and `variance`, and applies a scale (`gamma`) to it, as well as an offset (`beta`).
|
||||
|
||||
**Attributes**:
|
||||
|
||||
* *epsilon*
|
||||
* **Description**: *epsilon* is the number to be added to the variance to avoid division by zero when normalizing a value. For example, *epsilon* equal to 0.001 means that 0.001 is added to the variance.
|
||||
* **Range of values**: a positive floating-point number
|
||||
* **Type**: `float`
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
|
||||
**Inputs**
|
||||
|
||||
* **1**: `input` - input tensor with data for normalization. At least a 2D tensor of type T, the second dimension represents the channel axis and must have a span of at least 1. **Required.**
|
||||
* **2**: `gamma` - gamma scaling for normalized value. A 1D tensor of type T with the same span as input's channel axis. **Required.**
|
||||
* **3**: `beta` - bias added to the scaled normalized value. A 1D tensor of type T with the same span as input's channel axis.. **Required.**
|
||||
* **4**: `mean` - value for mean normalization. A 1D tensor of type T with the same span as input's channel axis.. **Required.**
|
||||
* **5**: `variance` - value for variance normalization. A 1D tensor of type T with the same span as input's channel axis.. **Required.**
|
||||
|
||||
**Outputs**
|
||||
|
||||
* **1**: The result of normalization. A tensor of the same type and shape with 1st input tensor.
|
||||
|
||||
**Types**
|
||||
|
||||
* *T*: any numeric type.
|
||||
|
||||
**Mathematical Formulation**
|
||||
|
||||
*BatchNormInference* normalizes the output in each hidden layer.
|
||||
* **Input**: Values of \f$x\f$ over a mini-batch:
|
||||
\f[
|
||||
\beta = \{ x_{1...m} \}
|
||||
\f]
|
||||
* **Parameters to learn**: \f$ \gamma, \beta\f$
|
||||
* **Output**:
|
||||
\f[
|
||||
\{ o_{i} = BN_{\gamma, \beta} ( b_{i} ) \}
|
||||
\f]
|
||||
* **Mini-batch mean**:
|
||||
\f[
|
||||
\mu_{\beta} \leftarrow \frac{1}{m}\sum_{i=1}^{m}b_{i}
|
||||
\f]
|
||||
* **Mini-batch variance**:
|
||||
\f[
|
||||
\sigma_{\beta }^{2}\leftarrow \frac{1}{m}\sum_{i=1}^{m} ( b_{i} - \mu_{\beta} )^{2}
|
||||
\f]
|
||||
* **Normalize**:
|
||||
\f[
|
||||
\hat{b_{i}} \leftarrow \frac{b_{i} - \mu_{\beta}}{\sqrt{\sigma_{\beta }^{2} + \epsilon }}
|
||||
\f]
|
||||
* **Scale and shift**:
|
||||
\f[
|
||||
o_{i} \leftarrow \gamma\hat{b_{i}} + \beta = BN_{\gamma ,\beta } ( b_{i} )
|
||||
\f]
|
||||
|
||||
**Example**
|
||||
|
||||
```xml
|
||||
<layer ... type="BatchNormInference" ...>
|
||||
<data epsilon="9.99e-06" />
|
||||
<input>
|
||||
<port id="0"> <!-- input -->
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>224</dim>
|
||||
<dim>224</dim>
|
||||
</port>
|
||||
<port id="1"> <!-- gamma -->
|
||||
<dim>3</dim>
|
||||
</port>
|
||||
<port id="2"> <!-- beta -->
|
||||
<dim>3</dim>
|
||||
</port>
|
||||
<port id="3"> <!-- mean -->
|
||||
<dim>3</dim>
|
||||
</port>
|
||||
<port id="4"> <!-- variance -->
|
||||
<dim>3</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="5">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>224</dim>
|
||||
<dim>224</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
```
|
||||
|
@ -19,7 +19,7 @@ declared in `namespace opset5`.
|
||||
* [Atan](arithmetic/Atan_1.md)
|
||||
* [Atanh](arithmetic/Atanh_3.md)
|
||||
* [AvgPool](pooling/AvgPool_1.md)
|
||||
* [BatchNormInference](normalization/BatchNormInference_1.md)
|
||||
* [BatchNormInference](normalization/BatchNormInference_5.md)
|
||||
* [BatchToSpace](movement/BatchToSpace_2.md)
|
||||
* [BinaryConvolution](convolution/BinaryConvolution_1.md)
|
||||
* [Broadcast](movement/Broadcast_3.md)
|
||||
|
@ -4,12 +4,13 @@
|
||||
|
||||
/**
|
||||
* @brief A header file for the CNNNetworkIterator class
|
||||
*
|
||||
*
|
||||
* @file ie_cnn_network_iterator.hpp
|
||||
*/
|
||||
#pragma once
|
||||
#include <iterator>
|
||||
#include <list>
|
||||
#include <deque>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
@ -32,25 +33,86 @@ CNNNetworkIterator {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
|
||||
std::unordered_set<CNNLayer*> visited;
|
||||
std::list<CNNLayerPtr> nextLayersTovisit;
|
||||
std::list<CNNLayerPtr> nextLayersToVisit;
|
||||
InferenceEngine::CNNLayerPtr currentLayer;
|
||||
ICNNNetwork* network = nullptr;
|
||||
const ICNNNetwork* network = nullptr;
|
||||
|
||||
void init(const ICNNNetwork* network) {
|
||||
void init(const ICNNNetwork* net) {
|
||||
network = net;
|
||||
if (network == nullptr) THROW_IE_EXCEPTION << "ICNNNetwork object is nullptr";
|
||||
// IE_ASSERT(dynamic_cast<const details::CNNNetworkImpl*>(network) != nullptr);
|
||||
|
||||
OutputsDataMap outputs;
|
||||
network->getOutputsInfo(outputs);
|
||||
|
||||
InputsDataMap inputs;
|
||||
network->getInputsInfo(inputs);
|
||||
if (!inputs.empty()) {
|
||||
auto& nextLayers = getInputTo(inputs.begin()->second->getInputData());
|
||||
if (!nextLayers.empty()) {
|
||||
currentLayer = nextLayers.begin()->second;
|
||||
nextLayersTovisit.push_back(currentLayer);
|
||||
visited.insert(currentLayer.get());
|
||||
|
||||
auto get_consumers = [](const CNNLayerPtr& node) -> std::vector<CNNLayerPtr> {
|
||||
std::vector<CNNLayerPtr> consumers;
|
||||
for (const auto & output : node->outData) {
|
||||
for (const auto &consumer : getInputTo(output)) {
|
||||
consumers.push_back(consumer.second);
|
||||
}
|
||||
}
|
||||
return consumers;
|
||||
};
|
||||
auto bfs = [&](const CNNLayerPtr& start_node, bool traverse_via_outputs = false) {
|
||||
if (!start_node || visited.count(start_node.get())) return;
|
||||
std::deque<CNNLayerPtr> q;
|
||||
q.push_front(start_node);
|
||||
while (!q.empty()) {
|
||||
auto node = q.front();
|
||||
q.pop_front();
|
||||
if (visited.insert(node.get()).second) {
|
||||
nextLayersToVisit.push_front(node);
|
||||
}
|
||||
|
||||
// Traverse via inputs
|
||||
for (const auto & input : node->insData) {
|
||||
auto locked_input = input.lock();
|
||||
if (!locked_input) {
|
||||
THROW_IE_EXCEPTION << "insData for " << node->name << " is not valid.";
|
||||
}
|
||||
if (auto next_node = getCreatorLayer(locked_input).lock()) {
|
||||
if (!visited.count(next_node.get())) {
|
||||
// Check that all consumers were visited
|
||||
bool all_consumers_used(true);
|
||||
for (const auto & consumer : get_consumers(next_node)) {
|
||||
if (!visited.count(consumer.get())) all_consumers_used = false;
|
||||
}
|
||||
if (all_consumers_used) {
|
||||
q.push_front(next_node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Traverse via outputs
|
||||
if (traverse_via_outputs) {
|
||||
for (const auto &consumer : get_consumers(node)) {
|
||||
if (!visited.count(consumer.get())) {
|
||||
q.push_front(consumer);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// First we run bfs starting from outputs that provides deterministic graph traverse
|
||||
for (const auto & output : outputs) {
|
||||
bfs(getCreatorLayer(output.second).lock());
|
||||
}
|
||||
|
||||
// For cases when graph has no outputs we start bfs from inputs to ensure topological sort
|
||||
for (const auto & input : inputs) {
|
||||
const auto data_ptr = input.second->getInputData();
|
||||
for (const auto & consumer : getInputTo(data_ptr))
|
||||
bfs(consumer.second, true);
|
||||
}
|
||||
currentLayer = nextLayersToVisit.front();
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
/**
|
||||
* iterator trait definitions
|
||||
@ -130,41 +192,24 @@ public:
|
||||
* @return true if the given iterator is equal to this one, false - otherwise
|
||||
*/
|
||||
bool operator==(const CNNNetworkIterator& that) const {
|
||||
return network == that.network && currentLayer == that.currentLayer;
|
||||
return currentLayer == that.currentLayer &&
|
||||
(network == that.network ||
|
||||
((network == nullptr || that.network == nullptr) && currentLayer == nullptr));
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
/**
|
||||
* @brief implementation based on BFS
|
||||
*/
|
||||
CNNLayerPtr next() {
|
||||
if (nextLayersTovisit.empty()) {
|
||||
if (nextLayersToVisit.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto nextLayer = nextLayersTovisit.front();
|
||||
nextLayersTovisit.pop_front();
|
||||
nextLayersToVisit.pop_front();
|
||||
|
||||
// visit child that not visited
|
||||
for (auto&& output : nextLayer->outData) {
|
||||
for (auto&& child : getInputTo(output)) {
|
||||
if (visited.find(child.second.get()) == visited.end()) {
|
||||
nextLayersTovisit.push_back(child.second);
|
||||
visited.insert(child.second.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// visit parents
|
||||
for (auto&& parent : nextLayer->insData) {
|
||||
auto parentLayer = getCreatorLayer(parent.lock()).lock();
|
||||
if (parentLayer && visited.find(parentLayer.get()) == visited.end()) {
|
||||
nextLayersTovisit.push_back(parentLayer);
|
||||
visited.insert(parentLayer.get());
|
||||
}
|
||||
}
|
||||
|
||||
return nextLayersTovisit.empty() ? nullptr : nextLayersTovisit.front();
|
||||
return nextLayersToVisit.empty() ? nullptr : nextLayersToVisit.front();
|
||||
}
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
@ -732,7 +732,6 @@ void convertFunctionToICNNNetwork(const std::shared_ptr<const ::ngraph::Function
|
||||
std::make_shared<Builder::NodeConverter<::ngraph::op::Asin>>(),
|
||||
std::make_shared<Builder::NodeConverter<::ngraph::op::Atan>>(),
|
||||
std::make_shared<Builder::NodeConverter<::ngraph::op::v1::AvgPool>>(),
|
||||
std::make_shared<Builder::NodeConverter<::ngraph::op::BatchNormInference>>(),
|
||||
std::make_shared<Builder::NodeConverter<::ngraph::op::Clamp>>(),
|
||||
std::make_shared<Builder::NodeConverter<::ngraph::op::Concat>>(),
|
||||
std::make_shared<Builder::NodeConverter<::ngraph::op::Constant>>(),
|
||||
|
@ -667,12 +667,6 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::Add>::createLayer(const std::shared_
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
CNNLayer::Ptr NodeConverter<ngraph::op::BatchNormInference>::createLayer(
|
||||
const std::shared_ptr<ngraph::Node>& layer) const {
|
||||
THROW_IE_EXCEPTION << "BatchNormInference operation should be fused or decomposed";
|
||||
}
|
||||
|
||||
template <>
|
||||
CNNLayer::Ptr NodeConverter<ngraph::op::Squeeze>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
|
||||
LayerParams params = {layer->get_friendly_name(), "Squeeze",
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "cnn_network_ngraph_impl.hpp"
|
||||
|
||||
#include "legacy/ie_util_internal.hpp"
|
||||
#include "legacy/cnn_network_impl.hpp"
|
||||
#include "legacy/details/ie_cnn_network_tools.h"
|
||||
#include "legacy/graph_tools.hpp"
|
||||
#include "legacy/net_pass.h"
|
||||
@ -160,8 +161,9 @@ details::CNNNetworkImplPtr cloneNet(const ICNNNetwork& origin_network) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IELegacy, "cloneNet(ICNNNetwork)");
|
||||
std::shared_ptr<ICNNNetwork> clonedNetwork;
|
||||
// Call conversion only on the copy of nGraph function
|
||||
if (auto func = origin_network.getFunction()) {
|
||||
clonedNetwork = cloneNetwork(origin_network);
|
||||
if (origin_network.getFunction()) {
|
||||
// Copy and call conversion
|
||||
clonedNetwork = std::make_shared<InferenceEngine::details::CNNNetworkImpl>(*cloneNetwork(origin_network));
|
||||
}
|
||||
const ICNNNetwork& network = (clonedNetwork) ? *clonedNetwork : origin_network;
|
||||
|
||||
|
@ -394,7 +394,6 @@ std::shared_ptr<ngraph::Node> V10Parser::createNode(const std::vector<ngraph::Ou
|
||||
std::make_shared<LayerCreator<ngraph::op::Asin>>("Asin"),
|
||||
std::make_shared<LayerCreator<ngraph::op::Atan>>("Atan"),
|
||||
std::make_shared<LayerCreator<ngraph::op::v1::AvgPool>>("AvgPool"),
|
||||
std::make_shared<LayerCreator<ngraph::op::BatchNormInference>>("BatchNormInference"),
|
||||
std::make_shared<LayerCreator<ngraph::op::Ceiling>>("Ceiling"),
|
||||
std::make_shared<LayerCreator<ngraph::op::Clamp>>("Clamp"),
|
||||
std::make_shared<LayerCreator<ngraph::op::Concat>>("Concat"),
|
||||
@ -951,20 +950,6 @@ std::shared_ptr<ngraph::Node> V10Parser::LayerCreator<ngraph::op::v0::LSTMCell>:
|
||||
activations, activations_alpha, activations_beta, clip);
|
||||
}
|
||||
|
||||
// BatchNormInference layer
|
||||
template <>
|
||||
std::shared_ptr<ngraph::Node> V10Parser::LayerCreator<ngraph::op::BatchNormInference>::createLayer(
|
||||
const ngraph::OutputVector& inputs, const pugi::xml_node& node, std::istream& binStream,
|
||||
const GenericLayerParams& layerParsePrms) {
|
||||
checkParameters(inputs, layerParsePrms, 5);
|
||||
pugi::xml_node dn = node.child("data");
|
||||
if (dn.empty())
|
||||
THROW_IE_EXCEPTION << "Cannot read parameter for " << getType() << " layer with name: " << layerParsePrms.name;
|
||||
|
||||
float eps = GetFloatAttr(dn, "eps");
|
||||
return std::make_shared<ngraph::op::BatchNormInference>(inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], eps);
|
||||
}
|
||||
|
||||
// CTCGreedyDecoder layer
|
||||
template <>
|
||||
std::shared_ptr<ngraph::Node> V10Parser::LayerCreator<ngraph::op::CTCGreedyDecoder>::createLayer(
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <ngraph/ngraph.hpp>
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <ngraph/opsets/opset5.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -18,6 +19,7 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
|
||||
class TRANSFORMATIONS_API BatchNormDecomposition;
|
||||
class TRANSFORMATIONS_API BatchNormV5Decomposition;
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
@ -27,3 +29,9 @@ public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
BatchNormDecomposition();
|
||||
};
|
||||
|
||||
class ngraph::pass::BatchNormV5Decomposition: public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
BatchNormV5Decomposition();
|
||||
};
|
||||
|
@ -93,6 +93,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
|
||||
decomp->add_matcher<ngraph::pass::ConvertDepthToSpace>();
|
||||
decomp->add_matcher<ngraph::pass::ConvertSpaceToDepth>();
|
||||
decomp->add_matcher<ngraph::pass::BatchNormDecomposition>();
|
||||
decomp->add_matcher<ngraph::pass::BatchNormV5Decomposition>();
|
||||
decomp->set_name("ngraph::pass::CommonDecompositions");
|
||||
|
||||
// CF is required after all decompositions
|
||||
|
@ -8,8 +8,11 @@
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/opsets/opset5.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
|
||||
using namespace ngraph;
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::BatchNormDecomposition, "BatchNormDecomposition", 0);
|
||||
|
||||
ngraph::pass::BatchNormDecomposition::BatchNormDecomposition() {
|
||||
@ -43,39 +46,107 @@ ngraph::pass::BatchNormDecomposition::BatchNormDecomposition() {
|
||||
|
||||
const auto& input_type = m_input->get_element_type();
|
||||
// scale_add = variance + eps
|
||||
auto scale_add = make_shared<opset1::Add>(m_var, opset1::Constant::create(input_type, Shape{}, {m_bn->get_eps_value()}));
|
||||
auto scale_add = make_shared<opset5::Add>(m_var, opset5::Constant::create(input_type, Shape{}, {m_bn->get_eps_value()}));
|
||||
// scale = sqrt(variance + eps)
|
||||
auto scale = make_shared<opset1::Sqrt>(scale_add);
|
||||
auto scale = make_shared<opset5::Sqrt>(scale_add);
|
||||
// Divide `gamma` by `sqrt(variance + eps)`
|
||||
auto gamma_div_scale = std::make_shared<opset1::Divide>(m_gamma, scale);
|
||||
auto gamma_div_scale = std::make_shared<opset5::Divide>(m_gamma, scale);
|
||||
|
||||
size_t dims_to_add = m_input->get_shape().size() - 2;
|
||||
Shape input_aligned_shape = m_gamma->get_shape();
|
||||
for (size_t i = 0; i < dims_to_add; ++i)
|
||||
input_aligned_shape.push_back(1);
|
||||
auto new_shape = opset1::Constant::create(element::i64, Shape{input_aligned_shape.size()}, input_aligned_shape);
|
||||
auto new_shape = opset5::Constant::create(element::i64, Shape{input_aligned_shape.size()}, input_aligned_shape);
|
||||
|
||||
auto gamma_div_scale_aligned = make_shared<opset1::Reshape>(gamma_div_scale, new_shape, true);
|
||||
auto beta_aligned = make_shared<opset1::Reshape>(m_beta, new_shape, true);
|
||||
auto mean_aligned = make_shared<opset1::Reshape>(m_mean, new_shape, true);
|
||||
auto gamma_div_scale_aligned = make_shared<opset5::Reshape>(gamma_div_scale, new_shape, true);
|
||||
auto beta_aligned = make_shared<opset5::Reshape>(m_beta, new_shape, true);
|
||||
auto mean_aligned = make_shared<opset5::Reshape>(m_mean, new_shape, true);
|
||||
|
||||
// input_sub_mean = input - mean
|
||||
auto input_sub_mean = register_new_node<opset1::Subtract>(m_input, mean_aligned);
|
||||
auto input_sub_mean = register_new_node<opset5::Subtract>(m_input, mean_aligned);
|
||||
// Multiply `input - mean` and `gamma / sqrt(variance + eps)`
|
||||
auto mul = std::make_shared<opset1::Multiply>(input_sub_mean, gamma_div_scale_aligned);
|
||||
auto mul = std::make_shared<opset5::Multiply>(input_sub_mean, gamma_div_scale_aligned);
|
||||
// Add `(input - mean) * gamma / sqrt(variance + eps)` and `beta`
|
||||
auto add = std::make_shared<opset1::Add>(mul, beta_aligned);
|
||||
auto add = std::make_shared<opset5::Add>(mul, beta_aligned);
|
||||
|
||||
add->set_friendly_name(m_bn->get_friendly_name());
|
||||
|
||||
copy_runtime_info(m_bn, {scale_add, scale, gamma_div_scale, gamma_div_scale_aligned,
|
||||
beta_aligned, input_sub_mean, mul, add});
|
||||
beta_aligned, input_sub_mean, mul, add});
|
||||
|
||||
replace_node(m_bn, add);
|
||||
|
||||
return true;
|
||||
};
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(bn, "BatchNormDecomposition");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::BatchNormV5Decomposition, "BatchNormDecomposition", 5);
|
||||
|
||||
ngraph::pass::BatchNormV5Decomposition::BatchNormV5Decomposition() {
|
||||
Shape shape{2, 2, 1, 1};
|
||||
auto input = make_shared<pattern::op::Label>(element::f32, shape);
|
||||
auto mean_shape = Shape{2};
|
||||
auto mean = make_shared<pattern::op::Label>(element::f32, mean_shape);
|
||||
auto var_shape = Shape{2};
|
||||
auto var = make_shared<pattern::op::Label>(element::f32, var_shape);
|
||||
auto gamma_shape = Shape{2};
|
||||
auto gamma = make_shared<pattern::op::Label>(element::f32, gamma_shape);
|
||||
auto beta_shape = Shape{2};
|
||||
auto beta = make_shared<pattern::op::Label>(element::f32, beta_shape);
|
||||
auto bn = make_shared<opset5::BatchNormInference>(input, gamma, beta, mean, var, 0.001);
|
||||
|
||||
ngraph::graph_rewrite_callback callback = [this, input, gamma, beta, mean, var](ngraph::pattern::Matcher &m) {
|
||||
auto pattern_map = m.get_pattern_map();
|
||||
|
||||
auto m_input = pattern_map[input];
|
||||
auto m_gamma = pattern_map[gamma];
|
||||
auto m_beta = pattern_map[beta];
|
||||
auto m_mean = pattern_map[mean];
|
||||
auto m_var = pattern_map[var];
|
||||
|
||||
// TODO: check that all input shapes are static
|
||||
|
||||
auto m_bn = dynamic_pointer_cast<opset5::BatchNormInference>(m.get_match_root());
|
||||
if (!m_bn) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& input_type = m_input->get_element_type();
|
||||
// scale_add = variance + eps
|
||||
auto scale_add = make_shared<opset5::Add>(m_var, opset5::Constant::create(input_type, Shape{}, {m_bn->get_eps_value()}));
|
||||
// scale = sqrt(variance + eps)
|
||||
auto scale = make_shared<opset5::Sqrt>(scale_add);
|
||||
// Divide `gamma` by `sqrt(variance + eps)`
|
||||
auto gamma_div_scale = std::make_shared<opset5::Divide>(m_gamma, scale);
|
||||
|
||||
size_t dims_to_add = m_input->get_shape().size() - 2;
|
||||
Shape input_aligned_shape = m_gamma->get_shape();
|
||||
for (size_t i = 0; i < dims_to_add; ++i)
|
||||
input_aligned_shape.push_back(1);
|
||||
auto new_shape = opset5::Constant::create(element::i64, Shape{input_aligned_shape.size()}, input_aligned_shape);
|
||||
|
||||
auto gamma_div_scale_aligned = make_shared<opset5::Reshape>(gamma_div_scale, new_shape, true);
|
||||
auto beta_aligned = make_shared<opset5::Reshape>(m_beta, new_shape, true);
|
||||
auto mean_aligned = make_shared<opset5::Reshape>(m_mean, new_shape, true);
|
||||
|
||||
// input_sub_mean = input - mean
|
||||
auto input_sub_mean = register_new_node<opset5::Subtract>(m_input, mean_aligned);
|
||||
// Multiply `input - mean` and `gamma / sqrt(variance + eps)`
|
||||
auto mul = std::make_shared<opset5::Multiply>(input_sub_mean, gamma_div_scale_aligned);
|
||||
// Add `(input - mean) * gamma / sqrt(variance + eps)` and `beta`
|
||||
auto add = std::make_shared<opset5::Add>(mul, beta_aligned);
|
||||
|
||||
add->set_friendly_name(m_bn->get_friendly_name());
|
||||
|
||||
copy_runtime_info(m_bn, {scale_add, scale, gamma_div_scale, gamma_div_scale_aligned,
|
||||
beta_aligned, input_sub_mean, mul, add});
|
||||
|
||||
replace_node(m_bn, add);
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(bn, "BatchNormDecomposition");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
@ -56,23 +56,36 @@ void PassImpl::run(const Model& model) {
|
||||
|
||||
const auto inputDimsA = inputA->desc().dims();
|
||||
|
||||
const auto K = inputDimsA[Dim::H];
|
||||
const auto M = inputDimsA[Dim::W];
|
||||
const auto batch1 = inputDimsA[Dim::N];
|
||||
const auto batch2 = inputDimsA[Dim::C];
|
||||
VPU_THROW_UNLESS(inputDimsA.size() >= 2 && inputDimsA.size() <= 4,
|
||||
"Processing layer {} with type {} failed: first inputs' ({} with usage {}) dimensions number should be in range [2, 4], but it actually has {}",
|
||||
stage->name(), stage->type(), inputA->name(), inputA->usage(), inputDimsA.size());
|
||||
|
||||
const auto inputATranspose = model->duplicateData(inputA, "@reshape", DataDesc{K, M, batch2, batch1});
|
||||
const auto perm = DimsOrder::fromNumDims(inputDimsA.size()).toPermutation();
|
||||
|
||||
std::vector<int> batchDims;
|
||||
DimValues_<Dim> permMap = { {perm[0], perm[1]}, {perm[1], perm[0]} };
|
||||
for (std::size_t i = 2; i < inputDimsA.size(); i++) {
|
||||
batchDims.push_back(inputDimsA[perm[i]]);
|
||||
permMap.set(perm[i], perm[i]);
|
||||
}
|
||||
|
||||
std::vector<int> transposedDims = {inputDimsA[perm[1]], inputDimsA[perm[0]]};
|
||||
transposedDims.insert(transposedDims.end(), batchDims.begin(), batchDims.end());
|
||||
|
||||
const auto inputATranspose = model->duplicateData(inputA, "@reshape", DataDesc{transposedDims});
|
||||
|
||||
stage->attrs().set<bool>("transposeA", false);
|
||||
model->replaceStageInput(stage->inputEdge(0), inputATranspose);
|
||||
|
||||
|
||||
|
||||
_stageBuilder->addPermuteStage(
|
||||
model,
|
||||
stage->name() + "@transpose",
|
||||
stage->origLayer(),
|
||||
inputA,
|
||||
inputATranspose,
|
||||
DimValues_<Dim>{{Dim::W, Dim::H}, {Dim::H, Dim::W}, {Dim::D, Dim::D}, {Dim::C, Dim::C}, {Dim::N, Dim::N}});
|
||||
permMap);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -99,6 +99,19 @@ void FrontEnd::parseGEMM(const Model& model, const ie::CNNLayerPtr& _layer, cons
|
||||
IE_ASSERT(inputs.size() == 2 || inputs.size() == 3);
|
||||
IE_ASSERT(outputs.size() == 1);
|
||||
|
||||
const auto input1 = inputs[0];
|
||||
const auto input2 = inputs[1];
|
||||
|
||||
VPU_THROW_UNLESS(input1->desc().numDims() >= 2 && input1->desc().numDims() <= 4,
|
||||
"Processing layer {} with type {} failed: first inputs' ({} with usage {}) dimensions number should be in range [2, 4], but it actually has {}",
|
||||
_layer->name, _layer->type, input1->name(), input1->usage(), input1->desc().numDims());
|
||||
VPU_THROW_UNLESS(input2->desc().numDims() >= 2 && input2->desc().numDims() <= 4,
|
||||
"Processing layer {} with type {} failed: second inputs' ({} with usage {}) dimensions number should be in range [2, 4], but it actually has {}",
|
||||
_layer->name, _layer->type, input2->name(), input2->usage(), input2->desc().numDims());
|
||||
VPU_THROW_UNLESS(inputs.size() < 3 || inputs[2]->desc().numDims() >= 2 && inputs[2]->desc().numDims() <= 4,
|
||||
"Processing layer {} with type {} failed: third inputs' ({} with usage {}) dimensions number should be in range [2, 4], but it actually has {}",
|
||||
_layer->name, _layer->type, inputs[2]->name(), inputs[2]->usage(), inputs[2]->desc().numDims());
|
||||
|
||||
auto layer = std::dynamic_pointer_cast<ie::GemmLayer>(_layer);
|
||||
IE_ASSERT(layer != nullptr);
|
||||
|
||||
|
@ -87,8 +87,8 @@ TEST_F(NGraphReaderTests, ReadBatchNormInferenceNetwork) {
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer name="bn" id="5" type="BatchNormInference" version="opset1">
|
||||
<data eps="0.1" />
|
||||
<layer name="bn" id="5" type="BatchNormInference" version="opset5">
|
||||
<data epsilon="0.1" />
|
||||
<input>
|
||||
<port id="1" precision="FP32">
|
||||
<dim>1</dim>
|
||||
|
@ -0,0 +1,55 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <string>
|
||||
#include <legacy/ie_util_internal.hpp>
|
||||
#include "ngraph_reader_tests.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
|
||||
TEST_F(NGraphReaderTests, ReadConstantNetwork) {
|
||||
std::string model = R"V0G0N(
|
||||
<net name="Network" version="10">
|
||||
<layers>
|
||||
<layer id="0" name="constant" type="Const" version="opset1">
|
||||
<data offset="0" size="5808"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>22</dim>
|
||||
<dim>22</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer name="output" type="Result" id="2" version="opset1">
|
||||
<input>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>22</dim>
|
||||
<dim>22</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
Core ie;
|
||||
Blob::Ptr weights;
|
||||
|
||||
weights = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, {5808}, Layout::C));
|
||||
weights->allocate();
|
||||
|
||||
auto network = ie.ReadNetwork(model, weights);
|
||||
auto clonedNetwork = cloneNetwork(network);
|
||||
auto clonedNet = cloneNet(network);
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
}
|
@ -14,12 +14,8 @@ const std::vector<InferenceEngine::Precision> inputPrecisions = {
|
||||
InferenceEngine::Precision::FP32
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> shapesA = {
|
||||
{1, 4, 5, 6}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> shapesB = {
|
||||
{1, 4, 6, 4}
|
||||
const std::vector<ShapeRelatedParams> shapeRelatedParams = {
|
||||
{ { {1, 4, 5, 6}, false }, { {1, 4, 6, 4}, false } }
|
||||
};
|
||||
|
||||
std::vector<ngraph::helpers::InputLayerType> secondaryInputTypes = {
|
||||
@ -27,18 +23,18 @@ std::vector<ngraph::helpers::InputLayerType> secondaryInputTypes = {
|
||||
ngraph::helpers::InputLayerType::PARAMETER,
|
||||
};
|
||||
|
||||
std::map<std::string, std::string> additional_config = {};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_MatMul, MatMulTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(shapeRelatedParams),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::ValuesIn(shapesA),
|
||||
::testing::ValuesIn(shapesB),
|
||||
::testing::Values(false),
|
||||
::testing::Values(false),
|
||||
::testing::ValuesIn(secondaryInputTypes),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(additional_config)),
|
||||
MatMulTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
|
@ -36,6 +36,8 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
#if (defined(_WIN32) || defined(_WIN64))
|
||||
R"(.*(CoreThreadingTestsWithIterations).*(smoke_LoadNetworkAccuracy).*)",
|
||||
#endif
|
||||
// TODO: Issue: 40957
|
||||
R"(.*(ConstantResultSubgraphTest).*)",
|
||||
// TODO: Issue: 34348
|
||||
R"(.*IEClassGetAvailableDevices.*)",
|
||||
// TODO: Issue: 25533
|
||||
|
@ -0,0 +1,17 @@
|
||||
// Copyright (C) 2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "subgraph_tests/constant_result.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Check, ConstantResultSubgraphTest,
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
ConstantResultSubgraphTest::getTestCaseName);
|
||||
} // namespace
|
||||
|
@ -42,5 +42,7 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
R"(.*IEClassHeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK.*)",
|
||||
// TODO: Issue 39358
|
||||
R"(.*unaligned.*MultipleConcatTest.*)",
|
||||
// TODO: Issue: 40960
|
||||
R"(.*(ConstantResultSubgraphTest).*)",
|
||||
};
|
||||
}
|
||||
|
@ -0,0 +1,17 @@
|
||||
// Copyright (C) 2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "subgraph_tests/constant_result.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Check, ConstantResultSubgraphTest,
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
ConstantResultSubgraphTest::getTestCaseName);
|
||||
} // namespace
|
||||
|
@ -22,5 +22,7 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
// Expected behavior
|
||||
R"(.*EltwiseLayerTest.*eltwiseOpType=Pow.*netPRC=I64.*)",
|
||||
R"(.*EltwiseLayerTest.*IS=\(.*\..*\..*\..*\..*\).*eltwiseOpType=Pow.*secondaryInputType=CONSTANT.*)",
|
||||
// TODO: Issue: 40958
|
||||
R"(.*(ConstantResultSubgraphTest).*)",
|
||||
};
|
||||
}
|
||||
|
@ -0,0 +1,17 @@
|
||||
// Copyright (C) 2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "subgraph_tests/constant_result.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Check, ConstantResultSubgraphTest,
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||
ConstantResultSubgraphTest::getTestCaseName);
|
||||
} // namespace
|
||||
|
@ -0,0 +1,46 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "single_layer_tests/mat_mul.hpp"
|
||||
#include <vpu/private_plugin_config.hpp>
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
|
||||
typedef std::map<std::string, std::string> Config;
|
||||
|
||||
const std::vector<InferenceEngine::Precision> inputPrecisions = {
|
||||
InferenceEngine::Precision::FP32
|
||||
};
|
||||
|
||||
const std::vector<ShapeRelatedParams> shapeRelatedParams = {
|
||||
{ { {1, 2, 7, 5}, true }, { {1, 2, 7, 11}, false } },
|
||||
{ { {10, 1, 1, 16}, false }, { {10, 1, 16, 1024}, false } },
|
||||
{ { {1, 5, 3}, true }, { {1, 5, 6}, false } },
|
||||
{ { {12, 8, 17}, false }, { {12, 17, 32}, false } },
|
||||
{ { {6, 128, 128}, false }, { {6, 128, 128}, false } },
|
||||
{ { {128, 384}, true }, { {128, 384}, false } },
|
||||
{ { {384, 128}, false }, { {372, 128}, true } },
|
||||
{ { {1, 2, 128, 384}, true }, { {1, 2, 128, 372}, false } },
|
||||
{ { {4, 3}, true }, { {5, 4}, true } },
|
||||
};
|
||||
|
||||
Config additionalConfig = {
|
||||
{InferenceEngine::MYRIAD_DETECT_NETWORK_BATCH, CONFIG_VALUE(NO)}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_MatMul, MatMulTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(shapeRelatedParams),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
|
||||
::testing::Values(CommonTestUtils::DEVICE_MYRIAD),
|
||||
::testing::Values(additionalConfig)),
|
||||
MatMulTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
@ -25,5 +25,7 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
R"(.*IEClassGetAvailableDevices.*)",
|
||||
// TODO: Issue: 40473
|
||||
R"(.*TopKLayerTest.*mode=min.*sort=index.*)",
|
||||
// TODO: Issue: 40961
|
||||
R"(.*(ConstantResultSubgraphTest).*)",
|
||||
};
|
||||
}
|
||||
|
@ -0,0 +1,17 @@
|
||||
// Copyright (C) 2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "subgraph_tests/constant_result.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Check, ConstantResultSubgraphTest,
|
||||
::testing::Values(CommonTestUtils::DEVICE_MYRIAD),
|
||||
ConstantResultSubgraphTest::getTestCaseName);
|
||||
} // namespace
|
||||
|
@ -11,17 +11,19 @@
|
||||
|
||||
#include "functional_test_utils/layer_test_utils.hpp"
|
||||
|
||||
struct ShapeRelatedParams {
|
||||
std::pair<InferenceEngine::SizeVector, bool> input1, input2;
|
||||
};
|
||||
|
||||
typedef std::tuple<
|
||||
InferenceEngine::Precision,
|
||||
InferenceEngine::Precision, // Input precision
|
||||
InferenceEngine::Precision, // Output precision
|
||||
InferenceEngine::Layout, // Input layout
|
||||
InferenceEngine::SizeVector,
|
||||
InferenceEngine::SizeVector,
|
||||
bool,
|
||||
bool,
|
||||
ngraph::helpers::InputLayerType,
|
||||
LayerTestsUtils::TargetDevice
|
||||
ShapeRelatedParams,
|
||||
InferenceEngine::Precision, // Network precision
|
||||
InferenceEngine::Precision, // Input precision
|
||||
InferenceEngine::Precision, // Output precision
|
||||
InferenceEngine::Layout, // Input layout
|
||||
ngraph::helpers::InputLayerType, // Secondary input type
|
||||
LayerTestsUtils::TargetDevice, // Device name
|
||||
std::map<std::string, std::string> // Additional network configuration
|
||||
> MatMulLayerTestParamsSet;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
@ -29,6 +31,10 @@ namespace LayerTestsDefinitions {
|
||||
class MatMulTest : public testing::WithParamInterface<MatMulLayerTestParamsSet>, virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<MatMulLayerTestParamsSet> &obj);
|
||||
static std::vector<ShapeRelatedParams> combineShapes(const std::vector<std::vector<size_t>>& firstInputShapes,
|
||||
const std::vector<std::vector<size_t>>& secondInputShapes,
|
||||
bool transposeA,
|
||||
bool transposeB);
|
||||
|
||||
protected:
|
||||
void SetUp() override;
|
||||
|
@ -0,0 +1,29 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "functional_test_utils/layer_test_utils.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
typedef std::tuple<
|
||||
std::string // Device name
|
||||
> constResultParams;
|
||||
|
||||
class ConstantResultSubgraphTest : public testing::WithParamInterface<constResultParams>,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<constResultParams> obj);
|
||||
protected:
|
||||
void SetUp() override;
|
||||
};
|
||||
} // namespace LayerTestsDefinitions
|
||||
|
@ -36,4 +36,4 @@ TEST_P(AddOutputsTest, smoke_CheckOutputExist) {
|
||||
for (const auto &out : expectedOutputs) {
|
||||
ASSERT_TRUE(outputs.count(out)) << "Layer " << out << " expected to be in network outputs but it's not!";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -13,53 +13,70 @@
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
std::vector<ShapeRelatedParams> MatMulTest::combineShapes(const std::vector<std::vector<size_t>>& firstInputShapes,
|
||||
const std::vector<std::vector<size_t>>& secondInputShapes,
|
||||
bool transposeA,
|
||||
bool transposeB) {
|
||||
std::vector<ShapeRelatedParams> resVec;
|
||||
for (const auto& firstInputShape : firstInputShapes) {
|
||||
for (const auto& secondInputShape : secondInputShapes) {
|
||||
resVec.push_back(ShapeRelatedParams{ {firstInputShape, transposeA}, {secondInputShape, transposeB } });
|
||||
}
|
||||
}
|
||||
return resVec;
|
||||
}
|
||||
|
||||
std::string MatMulTest::getTestCaseName(const testing::TestParamInfo<MatMulLayerTestParamsSet> &obj) {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
InferenceEngine::Precision inPrc, outPrc;
|
||||
InferenceEngine::Layout inLayout;
|
||||
InferenceEngine::SizeVector inputShape0;
|
||||
InferenceEngine::SizeVector inputShape1;
|
||||
bool transpose_a;
|
||||
bool transpose_b;
|
||||
ShapeRelatedParams shapeRelatedParams;
|
||||
ngraph::helpers::InputLayerType secondaryInputType;
|
||||
std::string targetDevice;
|
||||
std::tie(netPrecision, inPrc, outPrc, inLayout, inputShape0, inputShape1, transpose_a, transpose_b, secondaryInputType, targetDevice) =
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
std::tie(shapeRelatedParams, netPrecision, inPrc, outPrc, inLayout, secondaryInputType, targetDevice, additionalConfig) =
|
||||
obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS0=" << CommonTestUtils::vec2str(inputShape0) << "_";
|
||||
result << "IS1=" << CommonTestUtils::vec2str(inputShape1) << "_";
|
||||
result << "transpose_a=" << transpose_a << "_";
|
||||
result << "transpose_b=" << transpose_b << "_";
|
||||
result << "IS0=" << CommonTestUtils::vec2str(shapeRelatedParams.input1.first) << "_";
|
||||
result << "IS1=" << CommonTestUtils::vec2str(shapeRelatedParams.input2.first) << "_";
|
||||
result << "transpose_a=" << shapeRelatedParams.input1.second << "_";
|
||||
result << "transpose_b=" << shapeRelatedParams.input2.second << "_";
|
||||
result << "secondaryInputType=" << secondaryInputType << "_";
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "inPRC=" << inPrc.name() << "_";
|
||||
result << "outPRC=" << outPrc.name() << "_";
|
||||
result << "inL=" << inLayout << "_";
|
||||
result << "trgDev=" << targetDevice;
|
||||
result << "config=(";
|
||||
for (const auto configEntry : additionalConfig) {
|
||||
result << configEntry.first << ", " << configEntry.second << ":";
|
||||
}
|
||||
result << ")";
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void MatMulTest::SetUp() {
|
||||
InferenceEngine::SizeVector inputShape0;
|
||||
InferenceEngine::SizeVector inputShape1;
|
||||
bool transpose_a;
|
||||
bool transpose_b;
|
||||
ShapeRelatedParams shapeRelatedParams;
|
||||
ngraph::helpers::InputLayerType secondaryInputType;
|
||||
auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
|
||||
std::tie(netPrecision, inPrc, outPrc, inLayout, inputShape0, inputShape1, transpose_a, transpose_b, secondaryInputType, targetDevice) =
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
std::tie(shapeRelatedParams, netPrecision, inPrc, outPrc, inLayout, secondaryInputType, targetDevice, additionalConfig) =
|
||||
this->GetParam();
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto params = ngraph::builder::makeParams(ngPrc, {inputShape0});
|
||||
|
||||
auto secondaryInput = ngraph::builder::makeInputLayer(ngPrc, secondaryInputType, inputShape1);
|
||||
configuration.insert(additionalConfig.begin(), additionalConfig.end());
|
||||
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto params = ngraph::builder::makeParams(ngPrc, {shapeRelatedParams.input1.first});
|
||||
|
||||
auto secondaryInput = ngraph::builder::makeInputLayer(ngPrc, secondaryInputType, shapeRelatedParams.input2.first);
|
||||
if (secondaryInputType == ngraph::helpers::InputLayerType::PARAMETER) {
|
||||
params.push_back(std::dynamic_pointer_cast<ngraph::opset3::Parameter>(secondaryInput));
|
||||
}
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(
|
||||
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
|
||||
auto MatMul = std::dynamic_pointer_cast<ngraph::opset3::MatMul>(
|
||||
ngraph::builder::makeMatMul(paramOuts[0], secondaryInput, transpose_a, transpose_b));
|
||||
ngraph::builder::makeMatMul(paramOuts[0], secondaryInput, shapeRelatedParams.input1.second, shapeRelatedParams.input2.second));
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(MatMul)};
|
||||
function = std::make_shared<ngraph::Function>(results, params, "MatMul");
|
||||
}
|
||||
|
@ -0,0 +1,36 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "subgraph_tests/constant_result.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
std::string ConstantResultSubgraphTest::getTestCaseName(testing::TestParamInfo<constResultParams> obj) {
|
||||
std::string targetDevice;
|
||||
std::tie(targetDevice) = obj.param;
|
||||
std::ostringstream result;
|
||||
result << "TargetDevice=" << targetDevice;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void ConstantResultSubgraphTest::SetUp() {
|
||||
InferenceEngine::SizeVector inputShapes;
|
||||
InferenceEngine::Precision inputPrecision;
|
||||
std::tie(targetDevice) = this->GetParam();
|
||||
std::vector<float> data(300);
|
||||
for (size_t i = 0; i < 300; i++)
|
||||
data[i] = i;
|
||||
|
||||
auto constant = std::make_shared<ngraph::opset5::Constant>(ngraph::element::Type_t::f32, ngraph::Shape{1, 3, 10, 10}, data);
|
||||
const ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(constant)};
|
||||
ngraph::ParameterVector params;
|
||||
function = std::make_shared<ngraph::Function>(results, params, "ConstResult");
|
||||
}
|
||||
|
||||
TEST_P(ConstantResultSubgraphTest, CompareWithRefs) {
|
||||
Run();
|
||||
}
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
||||
|
@ -24,7 +24,7 @@ std::shared_ptr<ngraph::Node> makeBatchNormInference(const ngraph::Output<Node>&
|
||||
std::uniform_real_distribution<float> dis(0.0, 10.0);
|
||||
std::generate(values.begin(), values.end(), [&dis, &gen]() { return dis(gen); });
|
||||
auto variance = ngraph::builder::makeConstant(ngPrc, ngraph::Shape{C}, values, !random);
|
||||
return std::make_shared<ngraph::opset4::BatchNormInference>(data, gamma, beta, mean, variance, epsilon);
|
||||
return std::make_shared<ngraph::opset5::BatchNormInference>(data, gamma, beta, mean, variance, epsilon);
|
||||
}
|
||||
} // namespace builder
|
||||
} // namespace ngraph
|
||||
|
@ -251,8 +251,8 @@ TEST_F(GraphToolsTest, canIterateOverCNNNetwork) {
|
||||
CONNECT(6, 7);
|
||||
CONNECT(7, 8);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
std::vector<CNNLayerPtr> resultedOrder;
|
||||
@ -262,24 +262,24 @@ TEST_F(GraphToolsTest, canIterateOverCNNNetwork) {
|
||||
}
|
||||
|
||||
ASSERT_EQ(resultedOrder.size(), 8);
|
||||
ASSERT_STREQ(resultedOrder[0]->name.c_str(), "2");
|
||||
ASSERT_STREQ(resultedOrder[1]->name.c_str(), "6");
|
||||
ASSERT_STREQ(resultedOrder[2]->name.c_str(), "1");
|
||||
ASSERT_STREQ(resultedOrder[3]->name.c_str(), "7");
|
||||
ASSERT_STREQ(resultedOrder[4]->name.c_str(), "3");
|
||||
ASSERT_STREQ(resultedOrder[5]->name.c_str(), "8");
|
||||
ASSERT_STREQ(resultedOrder[6]->name.c_str(), "4");
|
||||
ASSERT_STREQ(resultedOrder[0]->name.c_str(), "1");
|
||||
ASSERT_STREQ(resultedOrder[1]->name.c_str(), "3");
|
||||
ASSERT_STREQ(resultedOrder[2]->name.c_str(), "4");
|
||||
ASSERT_STREQ(resultedOrder[3]->name.c_str(), "2");
|
||||
ASSERT_STREQ(resultedOrder[4]->name.c_str(), "6");
|
||||
ASSERT_STREQ(resultedOrder[5]->name.c_str(), "7");
|
||||
ASSERT_STREQ(resultedOrder[6]->name.c_str(), "8");
|
||||
ASSERT_STREQ(resultedOrder[7]->name.c_str(), "5");
|
||||
}
|
||||
|
||||
TEST_F(GraphToolsTest, canIterateOverCNNNetworkWithCycle) {
|
||||
TEST_F(GraphToolsTest, DISABLED_canIterateOverCNNNetworkWithCycle) {
|
||||
CONNECT(1, 2);
|
||||
CONNECT(2, 3);
|
||||
CONNECT(3, 4);
|
||||
CONNECT(4, 2);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
std::vector<CNNLayerPtr> resultedOrder;
|
||||
@ -299,8 +299,8 @@ TEST_F(GraphToolsTest, canCompareCNNNetworkIterators) {
|
||||
CONNECT(1, 2);
|
||||
CONNECT(1, 3);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillOnce(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
details::CNNNetworkIterator i(wrap);
|
||||
@ -312,16 +312,16 @@ TEST_F(GraphToolsTest, canCompareCNNNetworkIterators) {
|
||||
ASSERT_EQ(i, i2);
|
||||
}
|
||||
|
||||
TEST_F(GraphToolsTest, canIterateOverEmptyNetwork) {
|
||||
TEST_F(GraphToolsTest, DISABLED_canIterateOverEmptyNetwork) {
|
||||
CONNECT(1, 2);
|
||||
CONNECT(2, 1);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillOnce(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
details::CNNNetworkIterator beg(wrap), end;
|
||||
ASSERT_EQ(beg, end);
|
||||
ASSERT_TRUE(beg == end);
|
||||
}
|
||||
|
||||
TEST_F(GraphToolsTest, CNNNetSwapLayersThrowsForNullPointers) {
|
||||
@ -333,8 +333,8 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSwapWithItself) {
|
||||
CONNECT(1, 2);
|
||||
CONNECT(2, 3);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -350,11 +350,11 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSwapWithItself) {
|
||||
ASSERT_CONNECTION(2, 3);
|
||||
}
|
||||
|
||||
TEST_F(GraphToolsTest, CNNNetSwapLayersSimpleCase_1) {
|
||||
TEST_F(GraphToolsTest, DISABLED_CNNNetSwapLayersSimpleCase_1) {
|
||||
CONNECT(1, 2);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -374,8 +374,8 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSimpleCase_2) {
|
||||
CONNECT(1, 2);
|
||||
CONNECT(2, 3);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -396,8 +396,8 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSimpleCase_3) {
|
||||
CONNECT(1, 2);
|
||||
CONNECT(2, 3);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -422,8 +422,8 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersDoesSwapDims) {
|
||||
SET_DIMS(2, {20, 1});
|
||||
SET_DIMS(3, {30, 1});
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -449,8 +449,8 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSimpleCase_4) {
|
||||
CONNECT(3, 4);
|
||||
CONNECT(4, 5);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -473,8 +473,8 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSplit) {
|
||||
CONNECT(1, 2);
|
||||
CONNECT(1, 3);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -490,12 +490,12 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSplit) {
|
||||
ASSERT_CONNECTION(1, 2);
|
||||
ASSERT_CONNECTION(1, 3);
|
||||
}
|
||||
TEST_F(GraphToolsTest, CNNNetSwapLayersSplit_2) {
|
||||
TEST_F(GraphToolsTest, DISABLED_CNNNetSwapLayersSplit_2) {
|
||||
CONNECT(1, 2);
|
||||
CONNECT(1, 3);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -519,8 +519,8 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSplit_3) {
|
||||
CONNECT(2, 4);
|
||||
CONNECT(2, 5);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -547,8 +547,8 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSplit_4) {
|
||||
CONNECT(4, 2);
|
||||
CONNECT(4, 1);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -577,8 +577,8 @@ TEST_F(GraphToolsTest, CanNotInsertLayerIntoNonAdjiacendLayers) {
|
||||
CONNECT(1, 2);
|
||||
CONNECT(2, 3);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -595,8 +595,8 @@ TEST_F(GraphToolsTest, CanNotInsertLayerIntoNonAdjiacendLayers) {
|
||||
TEST_F(GraphToolsTest, CNNNetworkInsertLayerSimpleCase) {
|
||||
CONNECT(1, 2);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -617,8 +617,8 @@ TEST_F(GraphToolsTest, CNNNetworkInsertLayerSimpleCaseWithMultipleOutputs) {
|
||||
CONNECT(1, 2);
|
||||
CONNECT(1, 3);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -641,8 +641,8 @@ TEST_F(GraphToolsTest, CNNNetworkInsertLayerSimpleCaseWithMultipleInputs) {
|
||||
CONNECT(1, 2);
|
||||
CONNECT(3, 2);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -665,8 +665,8 @@ TEST_F(GraphToolsTest, CNNNetworkInsertLayerSplitAndConcat) {
|
||||
CONNECT_FROM_PORT(1, 1, 2);
|
||||
CONNECT_FROM_PORT(1, 2, 3);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -689,11 +689,11 @@ TEST_F(GraphToolsTest, CNNNetworkInsertLayerSplitAndConcat) {
|
||||
}
|
||||
|
||||
|
||||
TEST_F(GraphToolsTest, CNNNetworkInsertAfterLastLayer) {
|
||||
TEST_F(GraphToolsTest, DISABLED_CNNNetworkInsertAfterLastLayer) {
|
||||
CONNECT(1, 2);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -713,8 +713,8 @@ TEST_F(GraphToolsTest, CNNNetworkInsertAfterAll) {
|
||||
CONNECT(1, 2);
|
||||
CONNECT(1, 3);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -734,8 +734,8 @@ TEST_F(GraphToolsTest, CNNNetworkInsertAllAfterSplit) {
|
||||
CONNECT_FROM_PORT(1, 0, 2);
|
||||
CONNECT_FROM_PORT(1, 1, 3);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -756,8 +756,8 @@ TEST_F(GraphToolsTest, CNNNetworkInsert1AfterSplitBeforeEltwise) {
|
||||
CONNECT_FROM_PORT(1, 1, 4);
|
||||
CONNECT(2, 4);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -779,8 +779,8 @@ TEST_F(GraphToolsTest, CNNNetworkInsert1AfterSplit) {
|
||||
CONNECT_FROM_PORT(1, 1, 3);
|
||||
CONNECT_FROM_PORT(1, 2, 4);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -802,8 +802,8 @@ TEST_F(GraphToolsTest, CNNNetworkInsertAfter2ConnectionsToEltwise) {
|
||||
CONNECT(1, 2);
|
||||
CONNECT(1, 2);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -824,8 +824,8 @@ TEST_F(GraphToolsTest, CNNNetworkRemoveNullPointerLayer) {
|
||||
CONNECT_FROM_PORT(1, 1, 3);
|
||||
CONNECT_FROM_PORT(1, 2, 4);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -842,8 +842,8 @@ TEST_F(GraphToolsTest, CNNNetworkRemoveInputOrOutputLayer) {
|
||||
CONNECT_FROM_PORT(2, 0, 3);
|
||||
CONNECT_FROM_PORT(1, 0, 3);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -863,8 +863,8 @@ TEST_F(GraphToolsTest, CNNNetworkRemoveLayerThaHas2Outputs) {
|
||||
CONNECT_FROM_PORT(1, 0, 3);
|
||||
CONNECT_FROM_PORT(5, 0, 4);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -891,8 +891,8 @@ TEST_F(GraphToolsTest, CNNNetworkRemoveLayerSplit) {
|
||||
CONNECT_FROM_PORT(1, 1, 3);
|
||||
CONNECT_FROM_PORT(2, 0, 3);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -921,8 +921,8 @@ TEST_F(GraphToolsTest, CNNNetworkRemoveLayerSplit2) {
|
||||
CONNECT_FROM_PORT(2, 0, 4);
|
||||
CONNECT_FROM_PORT(2, 0, 5);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
@ -949,8 +949,8 @@ TEST_F(GraphToolsTest, CNNNetworkRemoveSimpleLayer) {
|
||||
CONNECT_FROM_PORT(1, 0, 2);
|
||||
CONNECT_FROM_PORT(2, 0, 3);
|
||||
|
||||
EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
|
||||
prepareInputs(maps);
|
||||
EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap & maps){
|
||||
prepareOutputs(maps);
|
||||
})));
|
||||
|
||||
EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
|
||||
|
@ -464,7 +464,7 @@ TEST(UtilTests, cloneNet_const) {
|
||||
ASSERT_EQ("custom_val3", getLayer(cloned, "input3")->params["custom_param3"]);
|
||||
}
|
||||
|
||||
TEST(UtilTests, getRootDataObjects) {
|
||||
TEST(UtilTests, DISABLED_getRootDataObjects) {
|
||||
//
|
||||
// I1-d1-L1-d7
|
||||
// \
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -15,7 +15,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -56,10 +56,10 @@ struct activation_fuse_params : fuse_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ActivationKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class ActivationKernelBase : public common_kernel_base {
|
||||
class ActivationKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using DispatchData = CommonDispatchData;
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
|
||||
virtual ~ActivationKernelBase() {}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -51,9 +51,9 @@ struct arg_max_min_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ArgMaxMinKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class ArgMaxMinKernelBase : public common_kernel_base {
|
||||
class ArgMaxMinKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~ArgMaxMinKernelBase() {}
|
||||
|
||||
struct DispatchData : public CommonDispatchData {
|
||||
@ -65,4 +65,4 @@ protected:
|
||||
virtual DispatchData SetDefault(const arg_max_min_params& params) const;
|
||||
KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
} // namespace kernel_selector
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -44,9 +44,9 @@ struct average_unpooling_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// AverageUnpoolingKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class AverageUnpoolingKernelBase : public common_kernel_base {
|
||||
class AverageUnpoolingKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~AverageUnpoolingKernelBase() {}
|
||||
|
||||
struct DispatchData : public CommonDispatchData {
|
||||
@ -59,4 +59,4 @@ protected:
|
||||
virtual DispatchData SetDefault(const average_unpooling_params& params) const;
|
||||
KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
} // namespace kernel_selector
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
#include <vector>
|
||||
|
||||
@ -46,9 +46,9 @@ struct batch_to_space_fuse_params : fuse_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// BatchToSpaceKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class BatchToSpaceKernelBase : public common_kernel_base {
|
||||
class BatchToSpaceKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~BatchToSpaceKernelBase() {}
|
||||
|
||||
struct DispatchData : public CommonDispatchData {};
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -14,7 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -46,9 +46,9 @@ struct border_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// BorderKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class BorderKernelBase : public common_kernel_base {
|
||||
class BorderKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2018-2019 Intel Corporation
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -14,7 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
#include <vector>
|
||||
|
||||
@ -37,9 +37,9 @@ struct broadcast_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// BroadcastKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class BroadcastKernelBase : public common_kernel_base {
|
||||
class BroadcastKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -14,7 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -58,9 +58,9 @@ struct concatenation_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ConcatenationKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class ConcatenationKernelBase : public common_kernel_base {
|
||||
class ConcatenationKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~ConcatenationKernelBase() {}
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
// Copyright (c) 2019-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -20,9 +20,9 @@
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class DeformableConvolutionKernel_bfyx_interp : public common_kernel_base {
|
||||
class DeformableConvolutionKernel_bfyx_interp : public KernelBaseOpenCL {
|
||||
public:
|
||||
DeformableConvolutionKernel_bfyx_interp() : common_kernel_base("deformable_convolution_gpu_bfyx_interp") {}
|
||||
DeformableConvolutionKernel_bfyx_interp() : KernelBaseOpenCL("deformable_convolution_gpu_bfyx_interp") {}
|
||||
virtual ~DeformableConvolutionKernel_bfyx_interp() {}
|
||||
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
|
@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
#include <string>
|
||||
|
||||
@ -37,9 +37,9 @@ struct ctc_greedy_decoder_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// CTCGreedyDecoderKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class CTCGreedyDecoderKernelBase : public common_kernel_base {
|
||||
class CTCGreedyDecoderKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~CTCGreedyDecoderKernelBase() {}
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
@ -40,9 +40,9 @@ struct cum_sum_optional_params : optional_params {
|
||||
cum_sum_optional_params() : optional_params(KernelType::CUM_SUM) {}
|
||||
};
|
||||
|
||||
class CumSumKernelBase : public common_kernel_base {
|
||||
class CumSumKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~CumSumKernelBase() = default;
|
||||
|
||||
struct DispatchData : public CommonDispatchData {
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -48,9 +48,9 @@ struct depth_to_space_fuse_params : fuse_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// DepthToSpaceKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class DepthToSpaceKernelBase : public common_kernel_base {
|
||||
class DepthToSpaceKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~DepthToSpaceKernelBase() {}
|
||||
|
||||
struct DispatchData : public CommonDispatchData {
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -15,7 +15,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -64,9 +64,9 @@ struct detection_output_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// DetectionOutputKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class DetectionOutputKernelBase : public common_kernel_base {
|
||||
class DetectionOutputKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base ::common_kernel_base;
|
||||
using KernelBaseOpenCL ::KernelBaseOpenCL;
|
||||
virtual ~DetectionOutputKernelBase() {}
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
@ -75,4 +75,4 @@ protected:
|
||||
JitConstants GetJitConstants(const detection_output_params& params) const;
|
||||
virtual DispatchData SetDefault(const detection_output_params& params) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
} // namespace kernel_selector
|
||||
|
@ -25,7 +25,12 @@ public:
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
std::vector<FusedOpType> GetSupportedFusedOps() const override {
|
||||
return { FusedOpType::QUANTIZE };
|
||||
return {
|
||||
FusedOpType::QUANTIZE,
|
||||
FusedOpType::ACTIVATION,
|
||||
FusedOpType::SCALE,
|
||||
FusedOpType::ELTWISE
|
||||
};
|
||||
}
|
||||
|
||||
protected:
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -15,7 +15,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include <vector>
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -112,9 +112,9 @@ struct scale_fuse_params : fuse_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// EltwiseKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class EltwiseKernelBase : public common_kernel_base {
|
||||
class EltwiseKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~EltwiseKernelBase() {}
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
// Copyright (c) 2019-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// y ou may not use this file except in compliance with the License.
|
||||
@ -70,9 +70,13 @@ JitConstants EltwiseKernelRef::GetJitConstants(const eltwise_params& params) con
|
||||
idx_order = {"d6", "d5", "d4", "d3", "d2", "d1"};
|
||||
}
|
||||
|
||||
FusedOpsConfiguration tensor_coord = {"_TENSOR", idx_order, "res", input_dt, 1};
|
||||
FusedOpsConfiguration linear_coord = {"_LINEAR", {"d1"}, "res", input_dt, 1, LoadType::LT_UNALIGNED, BoundaryCheck::ENABLED, IndexType::LINEAR_OFFSET};
|
||||
jit.Merge(MakeFusedOpsJitConstants(params, {tensor_coord, linear_coord}));
|
||||
if (!params.layoutBased && !params.int8_quantization && !params.broadcast && CheckInputsOutputNoPitchSameDims(params)) {
|
||||
FusedOpsConfiguration conf = {"", {"d1"}, "res", input_dt, 1, LoadType::LT_UNALIGNED, BoundaryCheck::ENABLED, IndexType::LINEAR_OFFSET};
|
||||
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
|
||||
} else {
|
||||
FusedOpsConfiguration conf = {"", idx_order, "res", input_dt, 1};
|
||||
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
|
||||
}
|
||||
}
|
||||
|
||||
return jit;
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -26,7 +26,12 @@ public:
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
std::vector<FusedOpType> GetSupportedFusedOps() const override {
|
||||
return { FusedOpType::QUANTIZE };
|
||||
return {
|
||||
FusedOpType::QUANTIZE,
|
||||
FusedOpType::ACTIVATION,
|
||||
FusedOpType::SCALE,
|
||||
FusedOpType::ELTWISE
|
||||
};
|
||||
}
|
||||
|
||||
JitConstants GetJitConstants(const eltwise_params& params) const;
|
||||
@ -34,4 +39,4 @@ public:
|
||||
protected:
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
} // namespace kernel_selector
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -42,9 +42,9 @@ struct embedding_bag_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// EmbeddingBagKernelRef
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class EmbeddingBagKernelRef : public common_kernel_base {
|
||||
class EmbeddingBagKernelRef : public KernelBaseOpenCL {
|
||||
public:
|
||||
EmbeddingBagKernelRef() : common_kernel_base("embedding_bag_ref") {}
|
||||
EmbeddingBagKernelRef() : KernelBaseOpenCL("embedding_bag_ref") {}
|
||||
virtual ~EmbeddingBagKernelRef() = default;
|
||||
|
||||
protected:
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
#include <vector>
|
||||
@ -41,9 +41,9 @@ struct extract_image_patches_optional_params : optional_params {
|
||||
extract_image_patches_optional_params() : optional_params(KernelType::EXTRACT_IMAGE_PATCHES) {}
|
||||
};
|
||||
|
||||
class ExtractImagePatchesKernelBase : public common_kernel_base {
|
||||
class ExtractImagePatchesKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
using DispatchData = CommonDispatchData;
|
||||
virtual ~ExtractImagePatchesKernelBase() {}
|
||||
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -37,9 +37,9 @@ struct gather_optional_params : optional_params {
|
||||
gather_optional_params() : optional_params(KernelType::GATHER) {}
|
||||
};
|
||||
|
||||
class GatherKernelRef : public common_kernel_base {
|
||||
class GatherKernelRef : public KernelBaseOpenCL {
|
||||
public:
|
||||
GatherKernelRef() : common_kernel_base("gather_ref") {}
|
||||
GatherKernelRef() : KernelBaseOpenCL("gather_ref") {}
|
||||
virtual ~GatherKernelRef() {}
|
||||
virtual JitConstants GetJitConstants(const gather_params& params) const;
|
||||
virtual CommonDispatchData SetDefault(const gather_params& params, const optional_params&) const;
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
// Copyright (c) 2019-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -14,7 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -34,9 +34,9 @@ struct gather_tree_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// BorderKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class GatherTreeKernelBase : public common_kernel_base {
|
||||
class GatherTreeKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
||||
protected:
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -48,9 +48,9 @@ struct gemm_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// BorderKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class GemmKernelBase : public common_kernel_base {
|
||||
class GemmKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
using FusedOpDesc = fused_operation_desc;
|
||||
using DispatchData = CommonDispatchData;
|
||||
virtual ~GemmKernelBase() {}
|
||||
|
@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
#include <string>
|
||||
|
||||
@ -37,9 +37,9 @@ struct grn_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// GRNKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class GRNKernelBase : public common_kernel_base {
|
||||
class GRNKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~GRNKernelBase() {}
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
||||
|
@ -18,7 +18,7 @@
|
||||
|
||||
namespace kernel_selector {
|
||||
bool LRNKernelBase::Validate(const Params& p, const optional_params& o) const {
|
||||
if (!common_kernel_base::Validate(p, o) || p.GetType() != KernelType::LRN || o.GetType() != KernelType::LRN) {
|
||||
if (!KernelBaseOpenCL::Validate(p, o) || p.GetType() != KernelType::LRN || o.GetType() != KernelType::LRN) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -15,7 +15,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -52,9 +52,9 @@ struct lrn_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// lrn_kernel_base
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class LRNKernelBase : public common_kernel_base {
|
||||
class LRNKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~LRNKernelBase() {}
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
#include <vector>
|
||||
#include <map>
|
||||
@ -82,9 +82,9 @@ struct lstm_elt_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// LSTMEltKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class LSTMEltKernelBase : public common_kernel_base {
|
||||
class LSTMEltKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~LSTMEltKernelBase() {}
|
||||
|
||||
struct DispatchData : public CommonDispatchData {};
|
||||
@ -101,4 +101,4 @@ protected:
|
||||
return true;
|
||||
}
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
} // namespace kernel_selector
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -71,9 +71,9 @@ struct lstm_gemm_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// LSTMGemmKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class LSTMGemmKernelBase : public common_kernel_base {
|
||||
class LSTMGemmKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~LSTMGemmKernelBase() {}
|
||||
|
||||
struct DispatchData : public CommonDispatchData {};
|
||||
@ -90,4 +90,4 @@ protected:
|
||||
return true;
|
||||
}
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
} // namespace kernel_selector
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
// Copyright (c) 2019-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
#pragma once
|
||||
#include "weight_bias_params.h"
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -39,9 +39,9 @@ struct lstm_dynamic_input_optional_params : weight_bias_optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// LSTM_DynamicInputKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class LSTM_DynamicInputKernelBase : public common_kernel_base {
|
||||
class LSTM_DynamicInputKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~LSTM_DynamicInputKernelBase() {}
|
||||
|
||||
struct DispatchData : public CommonDispatchData {};
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
// Copyright (c) 2019-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -93,9 +93,9 @@ struct lstm_dynamic_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// LSTM_DynamicTimeloopKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class LSTM_DynamicTimeloopKernelBase : public common_kernel_base {
|
||||
class LSTM_DynamicTimeloopKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~LSTM_DynamicTimeloopKernelBase() {}
|
||||
|
||||
struct DispatchData : public CommonDispatchData {};
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -15,7 +15,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -38,9 +38,9 @@ struct max_unpooling_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// MaxUnpoolingKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class MaxUnpoolingKernelBase : public common_kernel_base {
|
||||
class MaxUnpoolingKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~MaxUnpoolingKernelBase() {}
|
||||
|
||||
struct DispatchData : public CommonDispatchData {
|
||||
@ -53,4 +53,4 @@ protected:
|
||||
virtual DispatchData SetDefault(const max_unpooling_params& params) const;
|
||||
KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
} // namespace kernel_selector
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
#include <string>
|
||||
|
||||
@ -52,9 +52,9 @@ struct mvn_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// MVNKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class MVNKernelBase : public common_kernel_base {
|
||||
class MVNKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~MVNKernelBase() {}
|
||||
|
||||
struct DispatchData : public CommonDispatchData {
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -47,9 +47,9 @@ struct normalize_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// NormalizeKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class NormalizeKernelBase : public common_kernel_base {
|
||||
class NormalizeKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~NormalizeKernelBase() {}
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
// Copyright (c) 2019-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -14,7 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -40,9 +40,9 @@ struct one_hot_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// OneHotKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class OneHotKernelBase : public common_kernel_base {
|
||||
class OneHotKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2016-2019 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -15,7 +15,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include <vector>
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -40,13 +40,13 @@ struct permute_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// PermuteKernelRef
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class PermuteKernelRef : public common_kernel_base {
|
||||
class PermuteKernelRef : public KernelBaseOpenCL {
|
||||
public:
|
||||
PermuteKernelRef() : common_kernel_base("permute_ref") {}
|
||||
PermuteKernelRef() : KernelBaseOpenCL("permute_ref") {}
|
||||
virtual ~PermuteKernelRef() {}
|
||||
|
||||
JitConstants GetJitConstants(const permute_params& params) const;
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
} // namespace kernel_selector
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -15,7 +15,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -54,9 +54,9 @@ struct pooling_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// PoolingKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class PoolingKernelBase : public common_kernel_base {
|
||||
class PoolingKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~PoolingKernelBase() {}
|
||||
|
||||
struct DispatchData : public CommonDispatchData {
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2018-2019 Intel Corporation
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -14,7 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -43,9 +43,9 @@ struct PyramidROIAlign_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// PyramidROIAlignKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class PyramidROIAlignKernelBase : public common_kernel_base {
|
||||
class PyramidROIAlignKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~PyramidROIAlignKernelBase() {}
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
// Copyright (c) 2019-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -15,14 +15,14 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "quantize_kernel_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class QuantizeKernelBase : public common_kernel_base {
|
||||
class QuantizeKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~QuantizeKernelBase() {}
|
||||
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -41,9 +41,9 @@ struct reduce_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ReduceKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class ReduceKernelBase : public common_kernel_base {
|
||||
class ReduceKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
||||
virtual ~ReduceKernelBase() {}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -49,9 +49,9 @@ struct region_yolo_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// RegionYoloKernelRef
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class RegionYoloKernelRef : public common_kernel_base {
|
||||
class RegionYoloKernelRef : public KernelBaseOpenCL {
|
||||
public:
|
||||
RegionYoloKernelRef() : common_kernel_base("region_yolo_gpu_ref") {}
|
||||
RegionYoloKernelRef() : KernelBaseOpenCL("region_yolo_gpu_ref") {}
|
||||
virtual ~RegionYoloKernelRef() {}
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
#include <vector>
|
||||
|
||||
@ -94,9 +94,9 @@ struct reorder_weights_params : public Params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ReorderKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class ReorderKernelBase : public common_kernel_base {
|
||||
class ReorderKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~ReorderKernelBase() {}
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -44,9 +44,9 @@ struct reorg_yolo_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ReorgYoloKernelRef
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class ReorgYoloKernelRef : public common_kernel_base {
|
||||
class ReorgYoloKernelRef : public KernelBaseOpenCL {
|
||||
public:
|
||||
ReorgYoloKernelRef() : common_kernel_base("reorg_yolo_gpu_ref") {}
|
||||
ReorgYoloKernelRef() : KernelBaseOpenCL("reorg_yolo_gpu_ref") {}
|
||||
virtual ~ReorgYoloKernelRef() {}
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2016-2019 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -14,7 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
|
||||
#include <map>
|
||||
|
||||
@ -54,10 +54,10 @@ struct resample_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ResampleKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class ResampleKernelBase : public common_kernel_base {
|
||||
class ResampleKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using DispatchData = CommonDispatchData;
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
|
||||
virtual ~ResampleKernelBase() {}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2016-2019 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -73,7 +73,7 @@ KernelsData ReshapeKernelRef::GetKernelsData(const Params& params, const optiona
|
||||
}
|
||||
|
||||
bool ReshapeKernelRef::Validate(const Params& p, const optional_params& op) const {
|
||||
if (!common_kernel_base::Validate(p, op))
|
||||
if (!KernelBaseOpenCL::Validate(p, op))
|
||||
return false;
|
||||
|
||||
const auto& rp = static_cast<const reshape_params&>(p);
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2016-2019 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -14,7 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -33,9 +33,9 @@ struct reshape_optional_params : optional_params {
|
||||
reshape_optional_params() : optional_params(KernelType::RESHAPE) {}
|
||||
};
|
||||
|
||||
class ReshapeKernelRef : public common_kernel_base {
|
||||
class ReshapeKernelRef : public KernelBaseOpenCL {
|
||||
public:
|
||||
ReshapeKernelRef() : common_kernel_base("reshape_ref") {}
|
||||
ReshapeKernelRef() : KernelBaseOpenCL("reshape_ref") {}
|
||||
virtual ~ReshapeKernelRef() {}
|
||||
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
// Copyright (c) 2019-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -14,7 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -37,9 +37,9 @@ struct reverse_sequence_optional_params : optional_params {
|
||||
reverse_sequence_optional_params() : optional_params(KernelType::REVERSE_SEQUENCE) {}
|
||||
};
|
||||
|
||||
class ReverseSequenceKernelRef : public common_kernel_base {
|
||||
class ReverseSequenceKernelRef : public KernelBaseOpenCL {
|
||||
public:
|
||||
ReverseSequenceKernelRef() : common_kernel_base("reverse_sequence_ref") {}
|
||||
ReverseSequenceKernelRef() : KernelBaseOpenCL("reverse_sequence_ref") {}
|
||||
virtual ~ReverseSequenceKernelRef() {}
|
||||
virtual JitConstants GetJitConstants(const reverse_sequence_params& params) const;
|
||||
virtual CommonDispatchData SetDefault(const reverse_sequence_params& params, const optional_params&) const;
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
// Copyright (c) 2019-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -15,7 +15,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -57,9 +57,9 @@ struct roi_pooling_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ROIPoolingKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class ROIPoolingKernelBase : public common_kernel_base {
|
||||
class ROIPoolingKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~ROIPoolingKernelBase() {}
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -37,9 +37,9 @@ struct scatter_update_optional_params : optional_params {
|
||||
scatter_update_optional_params() : optional_params(KernelType::SCATTER_UPDATE) {}
|
||||
};
|
||||
|
||||
class ScatterUpdateKernelRef : public common_kernel_base {
|
||||
class ScatterUpdateKernelRef : public KernelBaseOpenCL {
|
||||
public:
|
||||
ScatterUpdateKernelRef() : common_kernel_base("scatter_update_ref") {}
|
||||
ScatterUpdateKernelRef() : KernelBaseOpenCL("scatter_update_ref") {}
|
||||
virtual ~ScatterUpdateKernelRef() {}
|
||||
virtual JitConstants GetJitConstants(const scatter_update_params& params) const;
|
||||
virtual CommonDispatchData SetDefault(const scatter_update_params& params, const optional_params&, bool is_second) const;
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -14,7 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -36,9 +36,9 @@ struct select_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SelectKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class SelectKernelBase : public common_kernel_base {
|
||||
class SelectKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~SelectKernelBase() {}
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
// Copyright (c) 2019-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -38,9 +38,9 @@ struct shuffle_channels_optional_params : optional_params {
|
||||
shuffle_channels_optional_params() : optional_params(KernelType::SHUFFLE_CHANNELS) {}
|
||||
};
|
||||
|
||||
class ShuffleChannelsKernelRef : public common_kernel_base {
|
||||
class ShuffleChannelsKernelRef : public KernelBaseOpenCL {
|
||||
public:
|
||||
ShuffleChannelsKernelRef() : common_kernel_base("shuffle_channels_ref") {}
|
||||
ShuffleChannelsKernelRef() : KernelBaseOpenCL("shuffle_channels_ref") {}
|
||||
virtual ~ShuffleChannelsKernelRef() {}
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -14,7 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -43,9 +43,9 @@ struct softmax_optional_params : optional_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SoftmaxKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class SoftmaxKernelBase : public common_kernel_base {
|
||||
class SoftmaxKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~SoftmaxKernelBase() {}
|
||||
|
||||
struct DispatchData : public CommonDispatchData {
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
#include <vector>
|
||||
|
||||
@ -47,9 +47,9 @@ struct space_to_batch_fuse_params : fuse_params {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SpaceToBatchKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class SpaceToBatchKernelBase : public common_kernel_base {
|
||||
class SpaceToBatchKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~SpaceToBatchKernelBase() {}
|
||||
|
||||
struct DispatchData : public CommonDispatchData {};
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
@ -40,9 +40,9 @@ struct space_to_depth_optional_params : optional_params {
|
||||
space_to_depth_optional_params() : optional_params(KernelType::SPACE_TO_DEPTH) {}
|
||||
};
|
||||
|
||||
class SpaceToDepthKernelRef : public common_kernel_base {
|
||||
class SpaceToDepthKernelRef : public KernelBaseOpenCL {
|
||||
public:
|
||||
SpaceToDepthKernelRef() : common_kernel_base("space_to_depth_ref") {}
|
||||
SpaceToDepthKernelRef() : KernelBaseOpenCL("space_to_depth_ref") {}
|
||||
virtual ~SpaceToDepthKernelRef() = default;
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include <vector>
|
||||
|
||||
namespace kernel_selector {
|
||||
@ -41,9 +41,9 @@ struct strided_slice_optional_params : optional_params {
|
||||
strided_slice_optional_params() : optional_params(KernelType::STRIDED_SLICE) {}
|
||||
};
|
||||
|
||||
class StridedSliceKernelRef : public common_kernel_base {
|
||||
class StridedSliceKernelRef : public KernelBaseOpenCL {
|
||||
public:
|
||||
StridedSliceKernelRef() : common_kernel_base("strided_slice_ref") {}
|
||||
StridedSliceKernelRef() : KernelBaseOpenCL("strided_slice_ref") {}
|
||||
virtual ~StridedSliceKernelRef() {}
|
||||
virtual JitConstants GetJitConstants(const strided_slice_params& params) const;
|
||||
virtual CommonDispatchData SetDefault(const strided_slice_params& params, const optional_params&) const;
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
// Copyright (c) 2018-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -14,7 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -36,9 +36,9 @@ struct tile_optional_params : optional_params {
|
||||
tile_optional_params() : optional_params(KernelType::TILE) {}
|
||||
};
|
||||
|
||||
class TileKernelRef : public common_kernel_base {
|
||||
class TileKernelRef : public KernelBaseOpenCL {
|
||||
public:
|
||||
TileKernelRef() : common_kernel_base("tile_ref") {}
|
||||
TileKernelRef() : KernelBaseOpenCL("tile_ref") {}
|
||||
virtual ~TileKernelRef() {}
|
||||
|
||||
virtual JitConstants GetJitConstants(const tile_params& params) const;
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2016-2017 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -15,18 +15,17 @@
|
||||
|
||||
#include "include/include_all.cl"
|
||||
|
||||
KERNEL(eltwise_gpu_vload8)(
|
||||
INPUTS_DECLS
|
||||
__global UNIT_TYPE* output)
|
||||
KERNEL(eltwise_gpu_vload8)(INPUTS_DECLS
|
||||
__global OUTPUT_TYPE* output)
|
||||
{
|
||||
const uint global_id = get_global_id(0);
|
||||
|
||||
VLOAD_DECLS
|
||||
|
||||
MAKE_VECTOR_TYPE(UNIT_TYPE, 8) res;
|
||||
MAKE_VECTOR_TYPE(OUTPUT_TYPE, 8) res;
|
||||
|
||||
DO_ELTWISE
|
||||
|
||||
|
||||
res = ACTIVATION(res, ACTIVATION_PARAMS);
|
||||
|
||||
vstore8(res, global_id, output);
|
||||
|
@ -57,10 +57,10 @@ KERNEL(eltwise)(
|
||||
#else
|
||||
const uint d1 = get_global_id(0);
|
||||
const uint d2 = (uint)get_global_id(1) % OUTPUT_SIZES[1];
|
||||
const uint d3 = (uint)get_global_id(1) / OUTPUT_SIZES[1] % OUTPUT_SIZE[2];
|
||||
const uint d4 = (uint)get_global_id(1) / OUTPUT_SIZES[1] / OUTPUT_SIZE[2];
|
||||
const uint d5 = (uint)get_global_id(2) % OUTPUT_SIZES[3];
|
||||
const uint d6 = (uint)get_global_id(2) / OUTPUT_SIZES[3];
|
||||
const uint d3 = (uint)get_global_id(1) / OUTPUT_SIZES[1] % OUTPUT_SIZES[2];
|
||||
const uint d4 = (uint)get_global_id(1) / OUTPUT_SIZES[1] / OUTPUT_SIZES[2];
|
||||
const uint d5 = (uint)get_global_id(2) % OUTPUT_SIZES[4];
|
||||
const uint d6 = (uint)get_global_id(2) / OUTPUT_SIZES[4];
|
||||
|
||||
uint output_offset = OUTPUT_GET_INDEX(d6, d5, d4, d3, d2, d1);
|
||||
#endif
|
||||
@ -117,13 +117,8 @@ KERNEL(eltwise)(
|
||||
DO_ELTWISE;
|
||||
|
||||
#if HAS_FUSED_OPS
|
||||
#if ELTWISE_NO_PITCH_SAME_DIMS
|
||||
FUSED_OPS_LINEAR;
|
||||
OUTPUT_TYPE out = FUSED_OPS_RESULT_LINEAR;
|
||||
#else
|
||||
FUSED_OPS_TENSOR;
|
||||
OUTPUT_TYPE out = FUSED_OPS_RESULT_TENSOR;
|
||||
#endif
|
||||
FUSED_OPS;
|
||||
OUTPUT_TYPE out = FUSED_OPS_RESULT;
|
||||
#else
|
||||
#define out res
|
||||
#endif
|
||||
|
@ -1395,6 +1395,7 @@ bool FusedOpsCodeGenerator::CanPreloadData(const FusedOpsConfiguration& conf) co
|
||||
switch (d) {
|
||||
case Tensor::DataChannelName::BATCH: can_preload &= idx.b == "0"; break;
|
||||
case Tensor::DataChannelName::FEATURE: can_preload &= idx.f == "0"; break;
|
||||
case Tensor::DataChannelName::W: can_preload &= idx.w == "0"; break;
|
||||
case Tensor::DataChannelName::Z: can_preload &= idx.z == "0"; break;
|
||||
case Tensor::DataChannelName::Y: can_preload &= idx.y == "0"; break;
|
||||
case Tensor::DataChannelName::X: can_preload &= idx.x == "0"; break;
|
||||
@ -1619,7 +1620,7 @@ JitConstants FusedOpsCodeGenerator::MakeOpJitConstants(const FusedOpsConfigurati
|
||||
case KernelType::ACTIVATION: {
|
||||
auto p = desc.GetOpParams<activation_fuse_params>();
|
||||
base_activation_params activation_p = p->param;
|
||||
op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + in_var + ";";
|
||||
op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + ConvertToOutputType(in_var, vec_size) + ";";
|
||||
if (activation_p.function != ActivationFunction::NONE) {
|
||||
auto suffix = "_FUSED_OP"+std::to_string(desc.op_id) + conf.suffix;
|
||||
std::string nl_m = std::to_string(activation_p.m);
|
||||
@ -1678,6 +1679,8 @@ std::string FusedOpsCodeGenerator::GetIdx(size_t input_id, idx_desc idx, bool sh
|
||||
idx_order = idx.b + "," + idx.f + "," + idx.y + "," + idx.x;
|
||||
} else if (DataTensor::ChannelsCount(desc.tensors[input_id].GetLayout()) == 5) {
|
||||
idx_order = idx.b + "," + idx.f + "," + idx.z + "," + idx.y + "," + idx.x;
|
||||
} else if (DataTensor::ChannelsCount(desc.tensors[input_id].GetLayout()) == 6) {
|
||||
idx_order = idx.b + "," + idx.f + "," + idx.w + "," + idx.z + "," + idx.y + "," + idx.x;
|
||||
}
|
||||
|
||||
if (should_be_safe) {
|
||||
|
@ -308,12 +308,13 @@ public:
|
||||
struct idx_desc {
|
||||
std::string b;
|
||||
std::string f;
|
||||
std::string w;
|
||||
std::string z;
|
||||
std::string y;
|
||||
std::string x;
|
||||
size_t dims;
|
||||
explicit idx_desc(std::vector<std::string> idx, DataTensor t)
|
||||
: b("0"), f("0"), z("0"), y("0"), x("0"), dims(0) {
|
||||
: b("0"), f("0"), w("0"), z("0"), y("0"), x("0"), dims(0) {
|
||||
dims = idx.size();
|
||||
switch (dims) {
|
||||
case 1: f = idx[0]; break;
|
||||
@ -321,7 +322,8 @@ public:
|
||||
case 3: b = idx[0]; f = idx[1]; y = idx[2]; break;
|
||||
case 4: b = idx[0]; f = idx[1]; y = idx[2]; x = idx[3]; break;
|
||||
case 5: b = idx[0]; f = idx[1]; z = idx[2]; y = idx[3]; x = idx[4]; break;
|
||||
default: throw std::runtime_error("More than 5 dimenstions is not supported in fused op generator");
|
||||
case 6: b = idx[0]; f = idx[1]; w = idx[2]; z = idx[3]; y = idx[4]; x = idx[5]; break;
|
||||
default: throw std::runtime_error("More than 6 dimenstions is not supported in fused op generator");
|
||||
}
|
||||
|
||||
if (t.Batch().v == 1) {
|
||||
@ -330,6 +332,9 @@ public:
|
||||
if (t.Feature().v == 1) {
|
||||
f = "0";
|
||||
}
|
||||
if (t.W().v == 1) {
|
||||
w = "0";
|
||||
}
|
||||
if (t.Z().v == 1) {
|
||||
z = "0";
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2016-2019 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -72,7 +72,7 @@ public:
|
||||
};
|
||||
} // namespace
|
||||
|
||||
std::string common_kernel_base::GetEntryPoint(const std::string& templateName,
|
||||
std::string KernelBaseOpenCL::GetEntryPoint(const std::string& templateName,
|
||||
const std::string& layerID,
|
||||
const optional_params& options) const {
|
||||
std::string kernelID = layerID;
|
||||
@ -89,7 +89,7 @@ std::string common_kernel_base::GetEntryPoint(const std::string& templateName,
|
||||
return kernelID;
|
||||
}
|
||||
|
||||
std::string common_kernel_base::CreateJit(const std::string& template_name,
|
||||
std::string KernelBaseOpenCL::CreateJit(const std::string& template_name,
|
||||
const JitConstants& constants,
|
||||
const std::string& kernel_id) const {
|
||||
class CodeBuilder code;
|
||||
@ -109,7 +109,7 @@ std::string common_kernel_base::CreateJit(const std::string& template_name,
|
||||
return jit;
|
||||
}
|
||||
|
||||
Arguments common_kernel_base::GetArgsDesc(uint32_t num_of_input,
|
||||
Arguments KernelBaseOpenCL::GetArgsDesc(uint32_t num_of_input,
|
||||
bool use_weights,
|
||||
bool use_bias,
|
||||
uint32_t number_of_inputs_for_fused_prim) const {
|
||||
@ -136,7 +136,7 @@ Arguments common_kernel_base::GetArgsDesc(uint32_t num_of_input,
|
||||
return args;
|
||||
}
|
||||
|
||||
std::shared_ptr<KernelString> common_kernel_base::GetKernelString(const std::string& name,
|
||||
std::shared_ptr<KernelString> KernelBaseOpenCL::GetKernelString(const std::string& name,
|
||||
const std::string& jit,
|
||||
const std::string& entry_point,
|
||||
const EngineInfo& engine_info,
|
||||
@ -160,7 +160,7 @@ std::shared_ptr<KernelString> common_kernel_base::GetKernelString(const std::str
|
||||
return kernel_string;
|
||||
}
|
||||
|
||||
uint32_t common_kernel_base::GetFusedPrimitiveInputsCount(const Params ¶ms) const {
|
||||
uint32_t KernelBaseOpenCL::GetFusedPrimitiveInputsCount(const Params ¶ms) const {
|
||||
auto p = dynamic_cast<const base_params&>(params);
|
||||
uint32_t fused_deps_total = 0;
|
||||
for (auto fused_op : p.fused_ops) {
|
||||
@ -170,18 +170,18 @@ uint32_t common_kernel_base::GetFusedPrimitiveInputsCount(const Params ¶ms)
|
||||
return fused_deps_total;
|
||||
}
|
||||
|
||||
void common_kernel_base::FillCLKernelData(clKernelData& kernel,
|
||||
const CommonDispatchData& dispatchData,
|
||||
const EngineInfo& engine_info,
|
||||
const std::string& kernelMapName,
|
||||
const std::string& jit,
|
||||
const std::string& entryPoint,
|
||||
const std::string& exeMode,
|
||||
bool weights,
|
||||
bool bias,
|
||||
int number_of_inputs,
|
||||
uint32_t number_of_inputs_for_fused_prims) const {
|
||||
CheckDispatchData(kernelMapName, dispatchData);
|
||||
void KernelBaseOpenCL::FillCLKernelData(clKernelData& kernel,
|
||||
const CommonDispatchData& dispatchData,
|
||||
const EngineInfo& engine_info,
|
||||
const std::string& kernelMapName,
|
||||
const std::string& jit,
|
||||
const std::string& entryPoint,
|
||||
const std::string& exeMode,
|
||||
bool weights,
|
||||
bool bias,
|
||||
int number_of_inputs,
|
||||
uint32_t number_of_inputs_for_fused_prims) const {
|
||||
KernelBase::CheckDispatchData(kernelMapName, dispatchData);
|
||||
kernel.workGroups.global = dispatchData.gws;
|
||||
kernel.workGroups.local = dispatchData.lws;
|
||||
kernel.kernelString = GetKernelString(kernelMapName, jit, entryPoint, engine_info, exeMode);
|
@ -22,10 +22,10 @@
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class common_kernel_base : public KernelBase {
|
||||
class KernelBaseOpenCL : public KernelBase {
|
||||
public:
|
||||
using KernelBase::KernelBase;
|
||||
virtual ~common_kernel_base() {}
|
||||
virtual ~KernelBaseOpenCL() {}
|
||||
|
||||
protected:
|
||||
virtual bool Validate(const Params&, const optional_params&) const { return true; }
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -16,19 +16,19 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_kernel_base.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "weight_bias_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// WeightsBiasKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class WeightBiasKernelBase : public common_kernel_base {
|
||||
class WeightBiasKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using common_kernel_base::common_kernel_base;
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~WeightBiasKernelBase() {}
|
||||
|
||||
protected:
|
||||
virtual JitConstants GetJitConstants(const weight_bias_params& params) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
} // namespace kernel_selector
|
||||
|
@ -171,6 +171,7 @@ attach_eltwise_gpu::attach_eltwise_gpu() {
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||
|
||||
|
@ -423,6 +423,10 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
|
||||
|
||||
should_fuse |= input_data.is_type<reduce>() && reduce_supports_fusings(input_data.as<reduce>());
|
||||
|
||||
should_fuse |= input_data.is_type<scale>();
|
||||
|
||||
should_fuse |= input_data.is_type<eltwise>();
|
||||
|
||||
if (!should_fuse)
|
||||
return;
|
||||
|
||||
@ -478,6 +482,10 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
|
||||
|
||||
should_fuse |= input_data.is_type<reduce>() && reduce_supports_fusings(input_data.as<reduce>());
|
||||
|
||||
should_fuse |= input_data.is_type<scale>();
|
||||
|
||||
should_fuse |= input_data.is_type<eltwise>();
|
||||
|
||||
if (!should_fuse)
|
||||
return;
|
||||
|
||||
@ -562,12 +570,14 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
|
||||
|
||||
should_fuse |= input_data.is_type<space_to_batch>() && quantize_node.get_scale_shift_opt();
|
||||
|
||||
should_fuse |= input_data.is_type<eltwise>() && quantize_node.get_scale_shift_opt();
|
||||
|
||||
should_fuse |= input_data.is_type<reduce>() &&
|
||||
reduce_supports_fusings(input_data.as<reduce>())
|
||||
&& quantize_node.get_scale_shift_opt();
|
||||
|
||||
should_fuse |= input_data.is_type<eltwise>() && quantize_node.get_scale_shift_opt();
|
||||
|
||||
should_fuse |= input_data.is_type<scale>() && quantize_node.get_scale_shift_opt();
|
||||
|
||||
if (!should_fuse)
|
||||
return;
|
||||
|
||||
@ -589,31 +599,26 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
|
||||
std::vector<cldnn::program_node*> parents = node.get_dependencies();
|
||||
std::list<cldnn::program_node*> users = node.get_users();
|
||||
|
||||
std::vector<bool> can_fuse_parents = { false, false };
|
||||
|
||||
for (size_t i = 0; i < parents.size(); i++) {
|
||||
can_fuse_parents[i] = (parents[i]->is_type<convolution>() && conv_supports_fusings(parents[i]->as<convolution>())) ||
|
||||
(parents[i]->is_type<mvn>() && mvn_supports_fusings(parents[i]->as<mvn>())) ||
|
||||
(parents[i]->is_type<deconvolution>()) ||
|
||||
(parents[i]->is_type<permute>()) ||
|
||||
(parents[i]->is_type<space_to_depth>()) ||
|
||||
(parents[i]->is_type<gemm>() && gemm_supports_fusings(parents[i]->as<gemm>())) ||
|
||||
(parents[i]->is_type<batch_to_space>()) ||
|
||||
(parents[i]->is_type<space_to_batch>()) ||
|
||||
(parents[i]->is_type<eltwise>()) ||
|
||||
(parents[i]->is_type<scale>()) ||
|
||||
(parents[i]->is_type<depth_to_space>() && dts_supports_fusings(parents[i]->as<depth_to_space>())) ||
|
||||
(parents[i]->is_type<reduce>() && reduce_supports_fusings(parents[i]->as<reduce>()));
|
||||
}
|
||||
|
||||
auto parent1 = parents[0];
|
||||
auto parent2 = parents[1];
|
||||
|
||||
bool can_fuse_parent1 = (parent1->is_type<convolution>() && conv_supports_fusings(parent1->as<convolution>())) ||
|
||||
(parent1->is_type<mvn>() && mvn_supports_fusings(parent1->as<mvn>())) ||
|
||||
(parent1->is_type<deconvolution>()) || (parent1->is_type<permute>()) ||
|
||||
(parent1->is_type<space_to_depth>()) ||
|
||||
(parent1->is_type<gemm>() && gemm_supports_fusings(parent1->as<gemm>())) ||
|
||||
(parent1->is_type<batch_to_space>()) || (parent1->is_type<space_to_batch>()) ||
|
||||
(parent1->is_type<depth_to_space>() && dts_supports_fusings(parent1->as<depth_to_space>())) ||
|
||||
(parent1->is_type<batch_to_space>()) || (parent1->is_type<space_to_batch>()) ||
|
||||
(parent1->is_type<reduce>() && reduce_supports_fusings(parent1->as<reduce>()));
|
||||
|
||||
bool can_fuse_parent2 = (parent2->is_type<convolution>() && conv_supports_fusings(parent2->as<convolution>())) ||
|
||||
(parent2->is_type<mvn>() && mvn_supports_fusings(parent2->as<mvn>())) ||
|
||||
(parent2->is_type<deconvolution>()) || (parent2->is_type<permute>()) ||
|
||||
(parent2->is_type<space_to_depth>()) ||
|
||||
(parent2->is_type<gemm>() && gemm_supports_fusings(parent2->as<gemm>())) ||
|
||||
(parent2->is_type<batch_to_space>()) || (parent2->is_type<space_to_batch>()) ||
|
||||
(parent2->is_type<depth_to_space>() && dts_supports_fusings(parent2->as<depth_to_space>())) ||
|
||||
(parent2->is_type<batch_to_space>()) || (parent2->is_type<space_to_batch>()) ||
|
||||
(parent2->is_type<reduce>() && reduce_supports_fusings(parent2->as<reduce>()));
|
||||
|
||||
std::vector<bool> can_fuse_parents = { can_fuse_parent1, can_fuse_parent2 };
|
||||
|
||||
auto p1_raw_size = parent1->get_output_layout().size.sizes();
|
||||
auto p2_raw_size = parent2->get_output_layout().size.sizes();
|
||||
for (unsigned k = 0; k < p1_raw_size.size(); k++) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -39,7 +39,7 @@ public:
|
||||
program_node& scale_in() const { return get_dependency(1); }
|
||||
program_node& bias() const { return get_dependency(2); }
|
||||
|
||||
bool bias_term() const { return get_dependencies().size() > 2; }
|
||||
bool bias_term() const { return get_primitive()->bias.length() != 0; }
|
||||
|
||||
std::shared_ptr<kernel_selector::fuse_params> get_fuse_params() const override {
|
||||
return std::make_shared<kernel_selector::scale_fuse_params>();
|
||||
@ -62,7 +62,7 @@ public:
|
||||
memory_impl& scale_memory() const { return dep_memory(1); }
|
||||
memory_impl& bias_memory() const { return dep_memory(2); }
|
||||
|
||||
bool bias_term() const { return _deps.size() > 2; }
|
||||
bool bias_term() const { return _node.as<scale>().bias_term(); }
|
||||
};
|
||||
|
||||
using scale_inst = typed_primitive_inst<scale>;
|
||||
|
@ -49,6 +49,10 @@ layout scale_inst::calc_output_layout(scale_node const& node) {
|
||||
if (desc->output_data_type)
|
||||
result.data_type = *desc->output_data_type;
|
||||
|
||||
if (node.has_fused_primitives()) {
|
||||
result.data_type = node.get_fused_output_layout().data_type;
|
||||
}
|
||||
|
||||
if (scale_x_size != 1) {
|
||||
CLDNN_ERROR_NOT_EQUAL(node.id(), "Scale x size", scale_x_size, "input x size", input_x_size, "");
|
||||
}
|
||||
|
@ -4314,23 +4314,23 @@ TEST_P(deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8, basic) {
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8,
|
||||
::testing::ValuesIn(std::vector<deconv_test_params>{
|
||||
deconv_test_params{ CASE_DECONV_FP32_1, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_2, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_3, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_4, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_5, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_6, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_7, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_8, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_1, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_2, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_3, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_4, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_5, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_6, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_7, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_8, 4, 9 },
|
||||
|
||||
deconv_test_params{ CASE_DECONV_FP16_1, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_2, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_3, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_4, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_5, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_6, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_7, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_8, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_1, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_2, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_3, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_4, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_5, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_6, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_7, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_8, 4, 9 },
|
||||
|
||||
deconv_test_params{ CASE_DECONV_U8S8_1, 2, 9 },
|
||||
deconv_test_params{ CASE_DECONV_U8S8_2, 2, 9 },
|
||||
@ -4350,24 +4350,24 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_u8_eltw_scale_actv_
|
||||
deconv_test_params{ CASE_DECONV_S8S8_7, 2, 9 },
|
||||
deconv_test_params{ CASE_DECONV_S8S8_8, 2, 9 },
|
||||
|
||||
deconv_test_params{ CASE_DECONV_FP32_3D_1, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_3D_2, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_3D_3, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_3D_4, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_3D_5, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_3D_6, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_3D_7, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_3D_8, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_3D_1, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_3D_2, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_3D_3, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_3D_4, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_3D_5, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_3D_6, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_3D_7, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP32_3D_8, 4, 9 },
|
||||
// deconv_test_params{ CASE_DECONV_FP32_3D_9, 6, 9 },
|
||||
|
||||
deconv_test_params{ CASE_DECONV_FP16_3D_1, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_3D_2, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_3D_3, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_3D_4, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_3D_5, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_3D_6, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_3D_7, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_3D_8, 6, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_3D_1, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_3D_2, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_3D_3, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_3D_4, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_3D_5, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_3D_6, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_3D_7, 4, 9 },
|
||||
deconv_test_params{ CASE_DECONV_FP16_3D_8, 4, 9 },
|
||||
// deconv_test_params{ CASE_DECONV_FP16_3D_9, 6, 9 },
|
||||
|
||||
deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 9 },
|
||||
@ -6224,36 +6224,37 @@ struct eltwise_test_params {
|
||||
size_t expected_not_fused_primitives;
|
||||
};
|
||||
|
||||
#define CASE_ELTWISE_FP32_1 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_FP32_2 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_FP32_3 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_FP16_1 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_FP16_2 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_FP16_3 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_I8_1 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_I8_2 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_I8_3 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_U8_1 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_U8_2 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_U8_3 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_FP32_FP16_1 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_FP32_FP16_2 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_FP32_FP16_3 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_FP16_FP32_1 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_FP16_FP32_2 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_FP16_FP32_3 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_I8_FP16_1 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_I8_FP16_2 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_I8_FP16_3 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_I8_FP32_1 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_I8_FP32_2 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_I8_FP32_3 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_U8_FP16_1 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_U8_FP16_2 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_U8_FP16_3 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_U8_FP32_1 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_U8_FP32_2 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_U8_FP32_3 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
||||
#define CASE_ELTWISE_FP32_1 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_FP32_2 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_FP32_3 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_FP32_4 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfwzyx, data_types::f32, format::bfwzyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_FP16_1 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_FP16_2 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_FP16_3 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_I8_1 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_I8_2 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_I8_3 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_U8_1 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_U8_2 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_U8_3 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_FP32_FP16_1 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_FP32_FP16_2 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_FP32_FP16_3 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_FP16_FP32_1 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_FP16_FP32_2 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_FP16_FP32_3 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_I8_FP16_1 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_I8_FP16_2 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_I8_FP16_3 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_I8_FP32_1 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_I8_FP32_2 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_I8_FP32_3 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_U8_FP16_1 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_U8_FP16_2 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_U8_FP16_3 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_U8_FP32_1 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_U8_FP32_2 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum
|
||||
#define CASE_ELTWISE_U8_FP32_3 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||
|
||||
|
||||
class EltwiseFusingTest : public ::BaseFusingTest<eltwise_test_params> {
|
||||
@ -6317,30 +6318,234 @@ TEST_P(eltwise_quantize, i8_per_channel) {
|
||||
INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
||||
eltwise_quantize,
|
||||
::testing::ValuesIn(std::vector<eltwise_test_params>{
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_1},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_2},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_3},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_1},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_2},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_3},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_1},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_2},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_3},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_1},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_2},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_3},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP32_1},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP32_2},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP32_3},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP32_1},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP32_2},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP32_3},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP16_1},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP16_2},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP16_3},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP16_1},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP16_2},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP16_3},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_1, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_2, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_3, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_1, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_2, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_3, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_1, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_2, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_3, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_1, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_2, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_3, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP32_1, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP32_2, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP32_3, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP32_1, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP32_2, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP32_3, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP16_1, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP16_2, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP16_3, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP16_1, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP16_2, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP16_3, 3, 4},
|
||||
}), );
|
||||
|
||||
class eltwise_fp32_fused_prims : public EltwiseFusingTest {};
|
||||
TEST_P(eltwise_fp32_fused_prims, scale_activation) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
input_layout("input2", get_input_layout2(p)),
|
||||
data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||
eltwise("eltwise", {"input", "input2"}, p.mode, p.default_type),
|
||||
scale("scale", "eltwise", "scale_data"),
|
||||
activation("activation", "scale", activation_func::abs),
|
||||
reorder("out", "activation", p.default_format, data_types::f32));
|
||||
|
||||
tolerance = 1e-5f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
TEST_P(eltwise_fp32_fused_prims, eltwise_activation) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
input_layout("input2", get_input_layout2(p)),
|
||||
data("eltwise_data", get_mem(get_input_layout2(p), -10, 10)),
|
||||
eltwise("eltwise1", {"input", "input2"}, p.mode, p.default_type),
|
||||
eltwise("eltwise2", {"eltwise1", "eltwise_data"}, eltwise_mode::prod, p.default_type),
|
||||
activation("activation", "eltwise2", activation_func::abs),
|
||||
reorder("out", "activation", p.default_format, data_types::f32));
|
||||
|
||||
tolerance = 1e-5f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
TEST_P(eltwise_fp32_fused_prims, eltwise_activation_with_broadcast) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
input_layout("input2", get_input_layout2(p)),
|
||||
data("eltwise_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||
eltwise("eltwise1", {"input", "input2"}, p.mode, p.default_type),
|
||||
eltwise("eltwise2", {"eltwise1", "eltwise_data"}, eltwise_mode::prod, p.default_type),
|
||||
activation("activation", "eltwise2", activation_func::abs),
|
||||
reorder("out", "activation", p.default_format, data_types::f32));
|
||||
|
||||
tolerance = 1e-5f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
||||
eltwise_fp32_fused_prims,
|
||||
::testing::ValuesIn(std::vector<eltwise_test_params>{
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_1, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_2, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_3, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_1, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_2, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_3, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_1, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_2, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_3, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_1, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_2, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_3, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP32_1, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP32_2, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP32_3, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP32_1, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP32_2, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP32_3, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP16_1, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP16_2, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_I8_FP16_3, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP16_1, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP16_2, 3, 5},
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP16_3, 3, 5},
|
||||
}), );
|
||||
|
||||
class eltwise_fp32_scale : public EltwiseFusingTest {};
|
||||
TEST_P(eltwise_fp32_scale, 6d) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
input_layout("input2", get_input_layout2(p)),
|
||||
data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||
eltwise("eltwise", {"input", "input2"}, p.mode, p.default_type),
|
||||
scale("scale", "eltwise", "scale_data"),
|
||||
reorder("out", "scale", p.default_format, data_types::f32));
|
||||
|
||||
tolerance = 1e-5f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
||||
eltwise_fp32_scale,
|
||||
::testing::ValuesIn(std::vector<eltwise_test_params>{
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_4, 3, 4},
|
||||
}), );
|
||||
|
||||
/* ----------------------------------------------------------------------------------------------------- */
|
||||
/* ---------------------------------------- Scale cases ------------------------------------------------ */
|
||||
/* ----------------------------------------------------------------------------------------------------- */
|
||||
struct scale_test_params {
|
||||
tensor input_size;
|
||||
data_types input_type;
|
||||
format input_format;
|
||||
data_types default_type;
|
||||
format default_format;
|
||||
size_t expected_fused_primitives;
|
||||
size_t expected_not_fused_primitives;
|
||||
};
|
||||
|
||||
// Scale uses the same kernel as eltwise primitive, so the kernel is well covered by the eltwise tests above
|
||||
// So here we can just check that fused scale kernel is constructed correctly (inputs are set correctly, fused precision is propagated, etc)
|
||||
// and fusing conditions in the graph are correct
|
||||
#define CASE_SCALE_FP32_1 {2, 16, 4, 4}, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_SCALE_FP32_2 {2, 16, 4, 4}, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_SCALE_FP32_3 {2, 16, 4, 4}, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16
|
||||
|
||||
class ScaleFusingTest : public ::BaseFusingTest<scale_test_params> {
|
||||
public:
|
||||
void execute(scale_test_params& p) {
|
||||
auto input_prim = get_mem(get_input_layout(p));
|
||||
|
||||
network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
|
||||
network network_fused(this->engine, this->topology_fused, bo_fused);
|
||||
|
||||
network_fused.set_input_data("input", input_prim);
|
||||
network_not_fused.set_input_data("input", input_prim);
|
||||
|
||||
compare(network_not_fused, network_fused, p);
|
||||
}
|
||||
|
||||
layout get_input_layout(scale_test_params& p) { return layout{p.input_type, p.input_format, p.input_size}; }
|
||||
|
||||
layout get_per_channel_layout(scale_test_params& p) {
|
||||
return layout{p.default_type, p.default_format, tensor{1, p.input_size.feature[0], 1, 1}};
|
||||
}
|
||||
};
|
||||
|
||||
class scale_basic : public ScaleFusingTest {};
|
||||
TEST_P(scale_basic, no_bias_act_eltwise) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||
scale("scale", "input", "scale_data"),
|
||||
activation("activation", "scale", activation_func::negative),
|
||||
data("eltwise_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||
eltwise("eltwise", {"activation", "eltwise_data"}, eltwise_mode::prod, p.default_type),
|
||||
reorder("out", "eltwise", p.default_format, data_types::f32));
|
||||
|
||||
tolerance = 1e-5f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
TEST_P(scale_basic, bias_act_eltwise) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||
data("bias_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||
scale("scale", "input", "scale_data", "bias_data"),
|
||||
activation("activation", "scale", activation_func::negative),
|
||||
data("eltwise_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||
eltwise("eltwise", {"activation", "eltwise_data"}, eltwise_mode::prod, p.default_type),
|
||||
reorder("out", "eltwise", p.default_format, data_types::f32));
|
||||
|
||||
tolerance = 1e-5f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
TEST_P(scale_basic, bias_act_scale) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||
data("bias_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||
scale("scale", "input", "scale_data", "bias_data"),
|
||||
activation("activation", "scale", activation_func::negative),
|
||||
data("scale_data2", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||
scale("scale2", "activation", "scale_data2"),
|
||||
reorder("out", "scale2", p.default_format, data_types::f32));
|
||||
|
||||
tolerance = 1e-5f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
TEST_P(scale_basic, bias_act_quantize) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||
data("bias_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||
scale("scale", "input", "scale_data", "bias_data"),
|
||||
activation("activation", "scale", activation_func::negative),
|
||||
data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
|
||||
data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
|
||||
data("out_lo", get_mem(get_single_element_layout(p), -128)),
|
||||
data("out_hi", get_mem(get_single_element_layout(p), 127)),
|
||||
quantize("quantize", "activation", "in_lo", "in_hi", "out_lo", "out_hi", 256, data_types::i8),
|
||||
reorder("out", "quantize", p.default_format, data_types::f32));
|
||||
|
||||
tolerance = 1.f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
||||
scale_basic,
|
||||
::testing::ValuesIn(std::vector<scale_test_params>{
|
||||
scale_test_params{CASE_SCALE_FP32_1, 2, 4},
|
||||
scale_test_params{CASE_SCALE_FP32_2, 2, 4},
|
||||
scale_test_params{CASE_SCALE_FP32_3, 2, 4},
|
||||
}), );
|
||||
|
||||
class eltwise_no_pitches_same_dims_quantize : public EltwiseFusingTest {};
|
||||
@ -6363,12 +6568,12 @@ TEST_P(eltwise_no_pitches_same_dims_quantize, quantize_f32_output) {
|
||||
INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
||||
eltwise_no_pitches_same_dims_quantize,
|
||||
::testing::ValuesIn(std::vector<eltwise_test_params>{
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_1},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_2},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_3},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_1},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_2},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_3},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_1, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_2, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_3, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_1, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_2, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_3, 3, 4},
|
||||
}), );
|
||||
|
||||
class eltwise_activation : public EltwiseFusingTest {};
|
||||
@ -6399,18 +6604,18 @@ TEST_P(eltwise_activation, fp16_out) {
|
||||
INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
||||
eltwise_activation,
|
||||
::testing::ValuesIn(std::vector<eltwise_test_params>{
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_1},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_2},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_3},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_1},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_2},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_3},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_1},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_2},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_3},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_1},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_2},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_3}
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_1, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_2, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_3, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_1, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_2, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_3, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_1, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_2, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_3, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_1, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_2, 3, 4},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_3, 3, 4}
|
||||
}), );
|
||||
|
||||
/* ----------------------------------------------------------------------------------------------------- */
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user