Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Steve Yoo
2021-06-08 09:49:38 +09:00
234 changed files with 7665 additions and 1825 deletions

View File

@@ -82,9 +82,10 @@ jobs:
- script: |
sudo apt --assume-yes install libusb-1.0-0-dev
# For opencv-python: setuptools and upgrade
sudo apt-get install python3-setuptools
sudo apt-get install python3-setuptools patchelf
python3 -m pip install --upgrade pip
python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt
python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/wheel/requirements-dev.txt
# For running Python API tests
python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt
# Speed up build
@@ -106,6 +107,7 @@ jobs:
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
-DENABLE_PYTHON=ON
-DPYTHON_EXECUTABLE=/usr/bin/python3.6
-DENABLE_WHEEL=ON
-DENABLE_TESTS=ON
-DNGRAPH_ONNX_IMPORT_ENABLE=ON
-DNGRAPH_ONNX_EDITOR_ENABLE=ON

View File

@@ -94,7 +94,6 @@ jobs:
-DENABLE_PROFILING_ITT=OFF
-DENABLE_SAMPLES=OFF
-DENABLE_SPEECH_DEMO=OFF
-DENABLE_PYTHON=ON
-DNGRAPH_ONNX_IMPORT_ENABLE=ON
-DNGRAPH_ONNX_EDITOR_ENABLE=ON
-DNGRAPH_INTERPRETER_ENABLE=ON

View File

@@ -1,3 +0,0 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

View File

@@ -5,12 +5,13 @@
Check GitHub organization and invite members
"""
# pylint: disable=fixme,no-member
# pylint: disable=fixme,no-member,too-many-locals
from argparse import ArgumentParser
import github_api
from configs import Config
from github_api import GithubOrgApi, get_dev_emails
from ldap_api import LdapApi, print_user_info, InfoLevel
def main():
@@ -19,32 +20,74 @@ def main():
arg_parser.add_argument("--cfg-file", metavar="PATH", default=Config.default_cfg_path,
help=f"Path to json configuration file, e.g. {Config.default_cfg_path}")
arg_parser.add_argument("--teams", action="store_true", help="Check GitHub teams")
arg_parser.add_argument("--no-ldap", action="store_true", help="Don't use LDAP info")
args, unknown_args = arg_parser.parse_known_args()
Config(args.cfg_file, unknown_args)
gh_api = github_api.GithubOrgApi()
gh_api = GithubOrgApi()
if args.teams:
gh_api.get_org_teams()
else:
dev_emails = github_api.get_dev_emails()
print(f'\nDeveloper emails {len(dev_emails)}:', '; '.join(dev_emails))
return
org_emails = gh_api.get_org_emails()
print(f'\nOrg emails {len(org_emails)}:', '; '.join(org_emails))
cfg_emails = get_dev_emails()
print(f'\nCfg developer emails {len(cfg_emails)}:', '; '.join(sorted(cfg_emails)))
org_pendig_invitation_emails = gh_api.get_org_invitation_emails()
dev_emails = set()
dev_emails.update(cfg_emails)
invite_emails = dev_emails.difference(org_emails).difference(org_pendig_invitation_emails)
print(f'\nInvite emails {len(invite_emails)}:', '; '.join(invite_emails))
if not args.no_ldap:
ldap_api = LdapApi()
ldap_emails = ldap_api.get_user_emails()
dev_emails.update(ldap_emails)
print(f'\nLDAP developer emails {len(ldap_emails)}:', '; '.join(sorted(ldap_emails)))
no_in_dev_emails = org_emails.difference(dev_emails)
print(f'\nOrg members - no in developers list {len(no_in_dev_emails)}:',
'; '.join(no_in_dev_emails))
cfg_emails_no_in_ldap = ldap_api.get_absent_emails(cfg_emails)
print(f'\nCfg developer emails - absent in LDAP at all {len(cfg_emails_no_in_ldap)}:',
'; '.join(sorted(cfg_emails_no_in_ldap)))
valid_github_users = gh_api.get_valid_github_users(invite_emails)
cfg_ldap_inters = cfg_emails.intersection(ldap_emails)
print(f'\nCfg developer emails - present in LDAP developers {len(cfg_ldap_inters)}:',
'; '.join(sorted(cfg_ldap_inters)))
gh_api.invite_users(valid_github_users)
org_emails, org_logins_no_intel_email = gh_api.get_org_emails()
print(f'\nOrg emails {len(org_emails)}:', '; '.join(sorted(org_emails)))
org_emails_no_in_ldap = set()
if not args.no_ldap:
org_ldap_diff = org_emails.difference(ldap_emails)
print(f'\nOrg member emails - absent in LDAP developers {len(org_ldap_diff)}:',
'; '.join(sorted(org_ldap_diff)))
for email in org_ldap_diff:
user_info = ldap_api.get_user_info_by_email(email)
if user_info:
print_user_info(user_info, InfoLevel.PDL)
else:
org_emails_no_in_ldap.add(email)
org_pendig_invitation_emails = gh_api.get_org_invitation_emails()
invite_emails = dev_emails.difference(org_emails).difference(org_pendig_invitation_emails)
print(f'\nInvite emails {len(invite_emails)}:', '; '.join(sorted(invite_emails)))
valid_github_users = gh_api.get_valid_github_users(invite_emails)
gh_api.invite_users(valid_github_users)
print('\nCheck accounts below and remove from the GitHub organization and cfg list')
cfg_emails_no_in_org = sorted(cfg_emails.difference(org_emails))
print(f'\nCfg developer emails - absent in GitHub organization {len(cfg_emails_no_in_org)}:',
'; '.join(cfg_emails_no_in_org))
org_emails_no_in_dev = sorted(org_emails.difference(dev_emails))
print(f'\nOrg member emails - absent in cfg and LDAP developers {len(org_emails_no_in_dev)}:',
'; '.join(org_emails_no_in_dev))
print(f'\nOrg member emails - absent in LDAP at all {len(org_emails_no_in_ldap)}:',
'; '.join(sorted(org_emails_no_in_ldap)))
print(f'\nOrg member logins - absent Intel email {len(org_logins_no_intel_email)}:',
'; '.join(sorted(org_logins_no_intel_email)))
if __name__ == '__main__':

View File

@@ -8,6 +8,7 @@ Check GitHub PRs and set labels by type and categories, e.g. 'ExternalPR', 'cate
# pylint: disable=fixme,no-member
import re
import sys
import datetime
from argparse import ArgumentParser
from enum import Enum
@@ -18,10 +19,11 @@ from configs import Config
class PrType(Enum):
"""Constants for type of GitHub pull request by author membership"""
EXTERNAL = 'ExternalPR'
INTEL = 'ExternalIntelPR'
ORG = 'OpenvinoPR'
BAD = 'BadPR'
EXTERNAL = "ExternalPR"
INTEL = "ExternalIntelPR"
ORG = "OpenvinoPR"
BAD = "BadPR"
def get_pr_labels(pull):
@@ -33,13 +35,23 @@ def get_pr_labels(pull):
def set_pr_labels(pull, labels):
"""Sets PR labels"""
"""Sets new PR labels (all previously set labels are removed)"""
if not labels or Config().DRY_RUN:
return
print(f'Set PR labels:', labels)
print("Set PR labels:", labels)
# set_labels() should accept list but fails with empty "AssertionError:"
pull.set_labels(labels)
def add_pr_labels(pull, labels):
"""Adds PR labels"""
if not labels or Config().DRY_RUN:
return
print("Add PR labels:", labels)
for label in labels:
pull.add_to_labels(label)
def get_pr_type_by_labels(pull):
"""Gets PR type using labels"""
pr_lables = get_pr_labels(pull)
@@ -48,19 +60,19 @@ def get_pr_type_by_labels(pull):
if not pr_types_labels:
return None
if len(pr_types_labels) > 1:
print(f'Duplicated labels: {pr_types_labels}')
print(f"Duplicated labels: {pr_types_labels}")
return PrType.BAD
return PrType(PrType(pr_types_labels.pop()))
def get_label_by_team_name_re(team_name):
"""Generates label by PR reviwer team name using regular expressions"""
if 'admins' in team_name:
return 'category: ci'
re_compile_label = re.compile(rf'{Config().GITHUB_REPO}-(.+)-maintainers')
if "admins" in team_name:
return "category: ci"
re_compile_label = re.compile(rf"{Config().GITHUB_REPO}-(.+)-maintainers")
re_label = re_compile_label.match(team_name)
if re_label:
return f'category: {re_label.group(1).strip()}'
return f"category: {re_label.group(1).strip()}"
return None
@@ -80,17 +92,105 @@ def get_category_labels(pull):
return labels
def get_pr_info_str(pull):
"""Gets info about PR using a few workarounds"""
pr_title = pull.title.encode("ASCII", "ignore").decode()
# Workaround for PyGithub issue: https://github.com/PyGithub/PyGithub/issues/512
pr_created_at = pull.created_at.replace(tzinfo=datetime.timezone.utc).astimezone()
return (
f"PR: {pull.number} - {pr_title} - Created: {pr_created_at} - "
f"Labels: {get_pr_labels(pull)} - Type: {get_pr_type_by_labels(pull)}"
)
def update_labels(gh_api, pull, non_org_intel_pr_users, non_org_pr_users):
"""Checks and updates labels"""
print("Check and update labels:")
pr_type_by_labels = get_pr_type_by_labels(pull)
add_labels = []
# Checks PR source type
if gh_api.is_org_user(pull.user):
print(" - Org user")
elif github_api.is_intel_email(pull.user.email) or github_api.is_intel_company(
pull.user.company
):
print(" - Non org user with Intel email or company")
non_org_intel_pr_users.add(pull.user)
if pr_type_by_labels is not PrType.INTEL:
print(f'NO "{PrType.INTEL.value}" label: ', end="")
github_api.print_users(pull.user)
add_labels.append(PrType.INTEL.value)
elif github_api.is_user_ignored(pull.user):
print(" - IGNORED non org user with NO Intel email or company")
else:
print(" - Non org user with NO Intel email or company")
non_org_pr_users.add(pull.user)
if pr_type_by_labels is not PrType.EXTERNAL:
print(f'NO "{PrType.EXTERNAL.value}" label: ', end="")
github_api.print_users(pull.user)
add_labels.append(PrType.EXTERNAL.value)
add_labels += get_category_labels(pull)
add_pr_labels(pull, add_labels)
def get_wrong_commits(pull):
"""Returns commits with incorrect user and email"""
pr_author_email = pull.user.email.lower()
print("GitHub PR author email:", pr_author_email)
print("Check commits:")
wrong_commits = set()
for commit in pull.get_commits():
# import pprint; pprint.pprint(commit.raw_data)
print("Commit SHA:", commit.sha)
# Use raw data because commit author can be non GitHub user
commit_email = commit.raw_data["commit"]["author"]["email"].lower()
print(" Commit email:", commit_email)
if not github_api.is_valid_user(commit.author):
print(
" ERROR: User with the commit email is absent in GitHub:",
commit.raw_data["commit"]["author"]["name"],
)
wrong_commits.add(commit.sha)
if not commit.raw_data["commit"]["verification"]["verified"]:
print(
" WARNING: The commit is not verified. Reason:",
commit.raw_data["commit"]["verification"]["reason"],
)
if pr_author_email != commit_email:
print(" WARNING: Commit email and GitHub PR author public email are differnt")
return wrong_commits
def main():
"""The main entry point function"""
arg_parser = ArgumentParser()
arg_parser.add_argument("--cfg-file", metavar="PATH", default=Config.default_cfg_path,
help=f"Path to json configuration file, e.g. {Config.default_cfg_path}")
arg_parser.add_argument("--pr", metavar="NUMBER",
help="Get GitHub pull request with the number")
arg_parser.add_argument("--pr-state", default="open", choices=["open", "closed"],
help="Set GitHub pull request state")
arg_parser.add_argument("--newer", metavar="MINUTES",
help="Get newly created GitHub pull request only")
arg_parser.add_argument(
"--cfg-file",
metavar="PATH",
default=Config.default_cfg_path,
help=f"Path to json configuration file, e.g. {Config.default_cfg_path}",
)
arg_parser.add_argument(
"--pr", metavar="NUMBER", help="Get GitHub pull request with the number"
)
arg_parser.add_argument(
"--pr-state",
default="open",
choices=["open", "closed"],
help="Set GitHub pull request state",
)
arg_parser.add_argument(
"--newer", metavar="MINUTES", help="Get newly created GitHub pull request only"
)
arg_parser.add_argument(
"--check-commits",
action="store_true",
help="Check and compare git commit email with GitHub account email",
)
args, unknown_args = arg_parser.parse_known_args()
Config(args.cfg_file, unknown_args)
@@ -100,50 +200,52 @@ def main():
pulls = [gh_api.repo.get_pull(int(args.pr))]
else:
pulls = gh_api.repo.get_pulls(state=args.pr_state)
print(f'\nPRs count ({args.pr_state}):', pulls.totalCount)
print(f"\nPRs count ({args.pr_state}):", pulls.totalCount)
if args.newer:
pr_created_after = datetime.datetime.now() - datetime.timedelta(minutes=int(args.newer))
print('PRs created after:', pr_created_after)
pr_created_after = (
datetime.datetime.now() - datetime.timedelta(minutes=int(args.newer))
).astimezone()
print("Checking PRs created after:", pr_created_after)
non_org_intel_pr_users = set()
non_org_pr_users = set()
wrong_pulls = {}
for pull in pulls:
if args.newer and pull.created_at <= pr_created_after:
print(f'\nIGNORE: {pull} - Created: {pull.created_at}')
pr_created_at = pull.created_at.replace(tzinfo=datetime.timezone.utc).astimezone()
if args.newer and pr_created_at <= pr_created_after:
print(f"\nIGNORE: {get_pr_info_str(pull)}")
continue
pr_lables = get_pr_labels(pull)
pr_type_by_labels = get_pr_type_by_labels(pull)
set_labels = []
print(f'\n{pull} - Created: {pull.created_at} - Labels: {pr_lables} -',
f'Type: {pr_type_by_labels}', end='')
# Checks PR source type
if gh_api.is_org_user(pull.user):
print(' - Org user')
elif github_api.is_intel_email(pull.user.email) or \
github_api.is_intel_company(pull.user.company):
print(' - Non org user with Intel email or company')
non_org_intel_pr_users.add(pull.user)
if pr_type_by_labels is not PrType.INTEL:
print(f'NO "{PrType.INTEL.value}" label: ', end='')
github_api.print_users(pull.user)
set_labels.append(PrType.INTEL.value)
print(f"\n{get_pr_info_str(pull)}")
if args.check_commits:
wrong_commits = get_wrong_commits(pull)
if wrong_commits:
wrong_pulls[pull.number] = wrong_commits
else:
print(f' - Non org user with NO Intel email or company')
non_org_pr_users.add(pull.user)
if pr_type_by_labels is not PrType.EXTERNAL:
print(f'NO "{PrType.EXTERNAL.value}" label: ', end='')
github_api.print_users(pull.user)
set_labels.append(PrType.EXTERNAL.value)
update_labels(gh_api, pull, non_org_intel_pr_users, non_org_pr_users)
set_labels += get_category_labels(pull)
set_pr_labels(pull, set_labels)
if wrong_pulls:
for pull_number, wrong_commits in wrong_pulls.items():
print(
f"\nERROR: Remove or replace wrong commits in the PR {pull_number}:\n ",
"\n ".join(wrong_commits),
)
print(
"\nAbout commit signature verification:\n ",
"https://docs.github.com/en/github/authenticating-to-github/"
"managing-commit-signature-verification/about-commit-signature-verification",
)
sys.exit(1)
print(f'\nNon org user with Intel email or company:')
github_api.print_users(non_org_intel_pr_users)
print(f'\nNon org user with NO Intel email or company:')
github_api.print_users(non_org_pr_users)
if non_org_intel_pr_users:
print("\nNon org user with Intel email or company:")
github_api.print_users(non_org_intel_pr_users)
if non_org_pr_users:
print("\nNon org user with NO Intel email or company:")
github_api.print_users(non_org_pr_users)
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -6,7 +6,9 @@
"openvino-ci",
"openvino-pushbot",
"lab-nerval",
"lab-nerval-onnx-ci"
"lab-nerval-onnx-ci",
"onnx-watchdog-agent",
"dependabot"
],
"EMAILS_FILE_PATH": "dev_emails-test.txt",
"PROXIES": {

View File

@@ -57,19 +57,19 @@ class Config:
for name, value in self._json_cfg.items():
if hasattr(self, name):
raise ConfigException(f'Duplicating prosperity: {name}')
prosperity_value = self._args.get(name) or os.getenv(name)
if prosperity_value:
property_value = self._args.get(name) or os.getenv(name)
if property_value:
# Try to set prosperity_value as Python literal structures, e.g. DRY_RUN=False
try:
prosperity_value = ast.literal_eval(prosperity_value)
property_value = ast.literal_eval(property_value)
except Exception:
pass
if not isinstance(prosperity_value, type(value)):
if not isinstance(property_value, type(value)):
raise ConfigException(f'Python type of {name} parameter must be {type(value)}')
else:
prosperity_value = value
setattr(self, name, prosperity_value)
Config.properties[name] = prosperity_value
property_value = value
setattr(self, name, property_value)
Config.properties[name] = property_value
self.set_proxy()
@@ -78,7 +78,7 @@ class Config:
try:
with open(self._file_path) as conf:
self._json_cfg = json.load(conf)
except:
except Exception:
print('Failed to load configuration from:', self._file_path)
raise
@@ -105,7 +105,7 @@ class Config:
def _test():
"""Test and debug"""
print('Config.default_cfg_path:', Config.default_cfg_path)
cfg = Config(cli_args=['DRY_RUN=True'])
cfg = Config(cli_args=['DRY_RUN', 'PROXIES={"NO_PROXY": "localhost"}'])
print('Config.properties:', cfg.get_properties())

View File

@@ -11,7 +11,6 @@ import re
import time
from github import Github, GithubException, RateLimitExceededException, IncompletableObject
from github import UnknownObjectException
from github.PaginatedList import PaginatedList
from configs import Config
@@ -110,17 +109,13 @@ class GithubOrgApi:
def is_org_user(self, user):
"""Checks that user is a member of GitHub organization"""
if is_valid_user(user):
try:
membership = user.get_organization_membership(self.github_org)
# membership.role can be 'member' or 'admin'
if membership.state == 'active' and membership.role:
return True
except UnknownObjectException:
pass
# user.get_organization_membership(self.github_org) doesn't work with org members
# permissions, GITHUB_TOKEN must be org owner now
return self.github_org.has_in_members(user)
return False
def get_org_emails(self):
"""Gets and prints all emails of GitHub organization members"""
"""Gets and prints emails of all GitHub organization members"""
org_members = self.github_org.get_members()
org_emails = set()
org_members_fix = set()
@@ -146,7 +141,7 @@ class GithubOrgApi:
'; '.join(org_logins_fix_intel_email))
print(f'\nOrg members - no real name {len(org_emails_fix_name)}:',
'; '.join(org_emails_fix_name))
return org_emails
return (org_emails, org_logins_fix_intel_email)
def get_org_invitation_emails(self):
"""Gets GitHub organization teams prints info"""

236
.github/org_control/ldap_api.py vendored Normal file
View File

@@ -0,0 +1,236 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
"""
Gets info about users and groups via LDAP
"""
# pylint: disable=fixme,no-member
from enum import Enum
from ldap3 import Server, Connection, ALL, SUBTREE
from configs import Config
class LdapApiException(Exception):
"""Base LDAP API exception"""
class InfoLevel(Enum):
"""Constants for printing user info from LDAP"""
PDL = 'PDL' # Public Distribution List (group of e-mail addresses)
FULL = 'Full'
def print_user_info(info, info_level=None):
"""Pretty-print of a user info data structure (dict). info_level is the InfoLevel Enum"""
if not info or not info.get('mail'):
raise LdapApiException('ERROR: No info or absent mail')
def get_membership():
if info_level == InfoLevel.PDL:
membership_info = ' PDLs:'
elif info_level == InfoLevel.FULL:
membership_info = ' memberOf :'
else:
return ''
# Grouping groups by purpose
if info_level == InfoLevel.PDL:
sort_key = lambda i: i.split(',', 1)[0].lower()
else:
sort_key = lambda i: i.split(',', 1)[1] + i.split(',', 1)[0].lower()
for item in sorted(info['memberOf'], key=sort_key):
if info_level == InfoLevel.PDL and 'OU=Delegated' not in item:
continue
membership_info += f'\n {item}'
return membership_info
try:
text_info = \
f'\n{info["cn"]} <{info["mail"]}>; {info["sAMAccountName"]}; {info["employeeID"]}' \
f'\n Org group: {info["intelSuperGroupDescr"]} ({info["intelSuperGroupShortName"]}) /'\
f' {info["intelGroupDescr"]} ({info["intelGroupShortName"]}) /' \
f' {info["intelDivisionDescr"]} ({info["intelDivisionShortName"]}) /' \
f' {info["intelOrgUnitDescr"]}' \
f'\n Manager: {info["manager"]}' \
f'\n Location: {info["intelRegionCode"]} / {info["co"]} / {info["intelSiteCode"]} /' \
f' {info["intelBldgCode"]} ({info["intelSiteName"]}) /' \
f' {info["physicalDeliveryOfficeName"]}' \
f'\n Other: {info["employeeType"]} | {info["intelExportCountryGroup"]} |' \
f' {info["whenCreated"]} | {info["intelCostCenterDescr"]} | {info["jobDescription"]}'
except Exception as exc:
raise LdapApiException(f'ERROR: Failed to get info about "{info["mail"]}". ' \
f'Exception occurred:\n{repr(exc)}') from exc
print(text_info)
membership = get_membership()
if info_level == InfoLevel.PDL and membership:
print(membership)
elif info_level == InfoLevel.FULL:
for key in sorted(info):
if isinstance(info[key], list):
if key == 'memberOf':
print(membership)
else:
print(f' {key} :')
for item in info[key]:
print(' ', item)
else:
print(f' {key} : {info[key]}')
class LdapApi:
"""LDAP API for getting user info and emails"""
_binary_blobs = ['thumbnailPhoto', 'msExchUMSpokenName', 'msExchBlockedSendersHash']
_check_existing = [
'intelExportCountryGroup',
'physicalDeliveryOfficeName',
'intelSuperGroupShortName',
'intelGroupShortName',
'intelDivisionShortName',
]
null = '<null>'
def __init__(self):
self._cfg = Config()
self.server = Server(self._cfg.LDAP_SERVER, get_info=ALL)
self.connection = Connection(self.server,
user=self._cfg.LDAP_USER,
password=self._cfg.LDAP_PASSWORD,
auto_bind=True)
self.connection.bind()
def get_user_emails(self, groups=None):
"""Gets emails of LDAP groups and sub-groups"""
print('\nGet emails from LDAP groups:')
processed_ldap_members = {}
def process_group_members(member, parent_group):
if member in processed_ldap_members:
processed_ldap_members[member]['parent_groups'].append(parent_group)
print('\nWARNING: Ignore LDAP member to avoid duplication and recursive cycling '
f'of PDLs: {member}\n '
f'email: {processed_ldap_members[member].get("email")}\n parent_groups:')
for group in processed_ldap_members[member].get('parent_groups', []):
print(7 * ' ', group)
return
processed_ldap_members[member] = {'email': None, 'parent_groups': [parent_group]}
# AD moves terminated users to the boneyard OU in case the user returns,
# so it can be reactivated with little effort.
# After 30 days it is removed and the unix personality becomes unlinked.
if 'OU=Boneyard' in member:
return
self.connection.search(member, r'(objectClass=*)', SUBTREE,
attributes=['cn', 'member', 'mail'])
#print(self.connection.entries)
if not self.connection.response:
raise LdapApiException(f'ERROR: empty response. LDAP member: {member}')
# Check that the member is worker.
# The response can contain several items, but the first item is valid only
if 'OU=Workers' in member:
if self.connection.response[0]['attributes']['mail']:
processed_ldap_members[member]['email'] = \
self.connection.response[0]['attributes']['mail'].lower()
return
raise LdapApiException(f'ERROR: no mail. LDAP worker: {member}\n'
f'{self.connection.entries}')
if len(self.connection.response) > 1:
raise LdapApiException(f'ERROR: multiple responses for {member}: '
f'{len(self.connection.response)}\n'
f'{self.connection.entries}')
if self.connection.response[0]['attributes']['member']:
for group_member in self.connection.response[0]['attributes']['member']:
process_group_members(group_member, member)
else:
print(f'\nERROR: no members in LDAP group: {member}\n{self.connection.entries}')
for group in groups or self._cfg.LDAP_PDLs:
print('\nProcess ROOT LDAP group:', group)
process_group_members(group, 'ROOT')
return {
member.get('email') for member in processed_ldap_members.values() if member.get('email')
}
def _get_user_info(self, query):
"""Gets user info from LDAP as dict matching key and values pairs from query"""
query_filter = ''.join(f'({key}={value})' for key, value in query.items())
for domain in self._cfg.LDAP_DOMAINS:
search_base = f'OU=Workers,DC={domain},DC=corp,DC=intel,DC=com'
self.connection.search(
search_base,
f'(&(objectcategory=person)(objectclass=user)(intelflags=1){query_filter})',
SUBTREE,
attributes=['*'])
if self.connection.response:
if len(self.connection.response) > 1:
raise LdapApiException(f'ERROR: multiple responses for {query_filter}: '
f'{len(self.connection.response)}\n'
f'{self.connection.entries}')
info = self.connection.response[0]['attributes']
# remove long binary blobs
for blob in LdapApi._binary_blobs:
info[blob] = b''
for key in LdapApi._check_existing:
if not info.get(key):
info[key] = LdapApi.null
return info
return {}
def get_user_info_by_idsid(self, idsid):
"""Gets user info from LDAP as dict using account name for searching"""
return self._get_user_info({'sAMAccountName': idsid})
def get_user_info_by_name(self, name):
"""Gets user info from LDAP as dict using common name for searching"""
return self._get_user_info({'cn': name})
def get_user_info_by_email(self, email):
"""Gets user info from LDAP as dict using emails for searching"""
return self._get_user_info({'mail': email})
def get_absent_emails(self, emails):
"""Checks users by email in LDAP and returns absent emails"""
absent_emails = set()
for email in emails:
if not self.get_user_info_by_email(email):
absent_emails.add(email)
return absent_emails
def _test():
"""Test and debug"""
ldap = LdapApi()
emails = ldap.get_user_emails()
print(f'\nLDAP emails count: {len(emails)}\n{"; ".join(emails)}')
emails = ['foo@intel.com']
for email in emails:
info = ldap.get_user_info_by_email(email)
if info:
print_user_info(info, InfoLevel.PDL)
else:
print(f'\n{email} - not found')
if __name__ == '__main__':
_test()

View File

@@ -0,0 +1 @@
pylint==2.5.3

View File

@@ -1 +1,2 @@
PyGithub==1.51
ldap3==2.7

View File

@@ -1 +0,0 @@
pylint==2.3.0

View File

@@ -3,6 +3,7 @@ on: [push, pull_request]
jobs:
Build_Doc:
if: github.repository == 'openvinotoolkit/openvino'
runs-on: ubuntu-20.04
steps:
- name: Clone OpenVINO
@@ -38,6 +39,7 @@ jobs:
working-directory: build
- name: 'Upload doc'
if: github.event_name == 'push'
uses: actions/upload-artifact@v2
with:
name: openvino_doc

17
.github/workflows/check_pr_commits.yml vendored Normal file
View File

@@ -0,0 +1,17 @@
name: PR Commits
on: [pull_request]
jobs:
Checks:
runs-on: ubuntu-20.04
steps:
- name: Clone OpenVINO
uses: actions/checkout@v2
- name: Install dependencies
run: python3 -m pip install -r ./.github/org_control/requirements.txt
- name: PR commits
run: python3 ./.github/org_control/check_pr.py --pr=${{ github.event.number }} --check-commits DRY_RUN
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

4
.gitmodules vendored
View File

@@ -18,3 +18,7 @@
path = thirdparty/xbyak
url = https://github.com/herumi/xbyak.git
ignore = dirty
[submodule "thirdparty/zlib/zlib"]
path = thirdparty/zlib/zlib
url = https://github.com/madler/zlib.git
ignore = dirty

View File

@@ -52,7 +52,6 @@ function(build_ngraph)
else ()
ngraph_set(NGRAPH_ADDRESS_SANITIZER OFF)
endif ()
ngraph_set(NGRAPH_PYTHON_BUILD_ENABLE OFF)
if(ENABLE_TESTS AND NOT ANDROID)
ngraph_set(NGRAPH_UNIT_TEST_ENABLE ON)
@@ -85,6 +84,12 @@ function(build_ngraph)
ngraph_set(NGRAPH_THREAD_SANITIZER_ENABLE OFF)
endif()
if(ENABLE_PYTHON)
ngraph_set(NGRAPH_PYTHON_BUILD_ENABLE ON)
else()
ngraph_set(NGRAPH_PYTHON_BUILD_ENABLE OFF)
endif()
if(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$")
ie_add_compiler_flags(-Wno-error=uninitialized -Wno-error=literal-conversion)
elseif(UNIX)

View File

@@ -223,6 +223,7 @@ include(api_validator/api_validator)
include(vs_version/vs_version)
include(plugins/plugins)
include(add_ie_target)
include(CMakePackageConfigHelpers)
if(ENABLE_FUZZING)
enable_fuzzing()

View File

@@ -23,7 +23,7 @@ if (ENABLE_CLANG_FORMAT)
endif()
endif()
if(ENABLE_CLANG_FORMAT)
if(ENABLE_CLANG_FORMAT AND NOT TARGET clang_format_check_all)
add_custom_target(clang_format_check_all)
add_custom_target(clang_format_fix_all)
set_target_properties(clang_format_check_all clang_format_fix_all

View File

@@ -211,6 +211,16 @@ set(CMAKE_CXX_VISIBILITY_PRESET hidden)
set(CMAKE_C_VISIBILITY_PRESET hidden)
set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
function(ie_python_minimal_api target)
# pybind11 uses a lot of API which is not a part of minimal python API subset
# Ref 1: https://docs.python.org/3.11/c-api/stable.html
# Ref 2: https://github.com/pybind/pybind11/issues/1755
# target_compile_definitions(${target} PRIVATE Py_LIMITED_API=0x03090000)
# if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# target_compile_options(${target} PRIVATE "-Wno-unused-variable")
# endif()
endfunction()
if(WIN32)
ie_add_compiler_flags(-D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS)
ie_add_compiler_flags(/EHsc) # no asynchronous structured exception handling

View File

@@ -3,15 +3,15 @@
#
if(ENABLE_CPPLINT)
find_package(Python3 COMPONENTS Interpreter)
find_package(PythonInterp 3 QUIET)
if(NOT Python3_Interpreter_FOUND)
if(NOT PYTHONINTERP_FOUND)
message(WARNING "Python3 interpreter was not found (required for cpplint check)")
set(ENABLE_CPPLINT OFF)
endif()
endif()
if(ENABLE_CPPLINT)
if(ENABLE_CPPLINT AND NOT TARGET cpplint_all)
add_custom_target(cpplint_all ALL)
set_target_properties(cpplint_all PROPERTIES FOLDER cpplint)
set(CPPLINT_ALL_OUTPUT_FILES "" CACHE INTERNAL "All cpplint output files")
@@ -68,6 +68,7 @@ function(add_cpplint_target TARGET_NAME)
"${output_file}"
COMMAND
"${CMAKE_COMMAND}"
-D "PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}"
-D "CPPLINT_SCRIPT=${IEDevScripts_DIR}/cpplint/cpplint.py"
-D "INPUT_FILE=${source_file}"
-D "OUTPUT_FILE=${output_file}"

View File

@@ -25,7 +25,7 @@ set(FILTER "${DEFAULT_FILTER}${CUSTOM_FILTER}")
execute_process(
COMMAND
python3
"${PYTHON_EXECUTABLE}"
"${CPPLINT_SCRIPT}"
"--linelength=160"
"--counting=detailed"

View File

@@ -75,11 +75,6 @@ macro(ie_parse_ci_build_number)
set(IE_VERSION "${IE_VERSION_MAJOR}.${IE_VERSION_MINOR}.${IE_VERSION_PATCH}")
endmacro()
# WA for DL Benchmark
if(DEFINED ENV{CI_BUILD_NUMBER} AND "$ENV{CI_BUILD_NUMBER}" STREQUAL "1")
unset(ENV{CI_BUILD_NUMBER})
endif()
if (DEFINED ENV{CI_BUILD_NUMBER})
set(CI_BUILD_NUMBER $ENV{CI_BUILD_NUMBER})
else()

View File

@@ -29,10 +29,14 @@ Usage: -DSELECTIVE_BUILD=ON -DSELECTIVE_BUILD_STAT=/path/*.csv" OFF
ie_option(ENABLE_ERROR_HIGHLIGHT "Highlight errors and warnings during compile time" OFF)
# Try to find python3
find_package(PythonLibs 3 QUIET)
ie_dependent_option (ENABLE_PYTHON "enables ie python bridge build" OFF "PYTHONLIBS_FOUND" OFF)
#
# enable or disable output from NGRAPH_DEBUG statements
#
if(NGRAPH_DEBUG_ENABLE)
add_definitions(-DNGRAPH_DEBUG_ENABLE)
endif()

View File

@@ -52,19 +52,11 @@ set(GST_DOCS_DIR "" CACHE PATH "Path to gst-video-analytics documentation")
function(build_docs)
find_package(Doxygen REQUIRED dot)
find_package(Python3 COMPONENTS Interpreter)
find_package(LATEX)
if(NOT DOXYGEN_FOUND)
message(FATAL_ERROR "Doxygen is required to build the documentation")
endif()
if(NOT Python3_FOUND)
message(FATAL_ERROR "Python3 is required to build the documentation")
endif()
find_package(PythonInterp 3 REQUIRED)
find_package(LATEX REQUIRED)
execute_process(
COMMAND ${Python3_EXECUTABLE} -m pip show lxml
COMMAND ${PYTHON_EXECUTABLE} -m pip show lxml
RESULT_VARIABLE PIP_EXIT_CODE
OUTPUT_QUIET
)
@@ -73,10 +65,6 @@ function(build_docs)
message(FATAL_ERROR "lxml package is not installed. Please use \"pip install lxml\".")
endif()
if(NOT LATEX_FOUND)
message(FATAL_ERROR "LATEX is required to build the documentation")
endif()
set(DOCS_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}")
set(DOXYGEN_DIR "${OpenVINO_MAIN_SOURCE_DIR}/docs/doxygen")
set(IE_SOURCE_DIR "${OpenVINO_MAIN_SOURCE_DIR}/inference-engine")
@@ -217,7 +205,7 @@ function(build_docs)
add_custom_command(TARGET py_api
PRE_BUILD
COMMAND ${Python3_EXECUTABLE} ${PYX_FILTER} ${PYTHON_API_OUT}
COMMAND ${PYTHON_EXECUTABLE} ${PYX_FILTER} ${PYTHON_API_OUT}
COMMENT "Pre-process Python API")
# Preprocess docs
@@ -317,8 +305,8 @@ function(build_docs)
add_custom_command(TARGET preprocess_docs
PRE_BUILD
${commands}
COMMAND ${Python3_EXECUTABLE} ${DOXY_LAYOUT_SCRIPT} --openvino ${OPENVINO_LAYOUT_BUILD}
COMMAND ${Python3_EXECUTABLE} ${DOXY_MD_FILTER} ${DOCS_BUILD_DIR}
COMMAND ${PYTHON_EXECUTABLE} ${DOXY_LAYOUT_SCRIPT} --openvino ${OPENVINO_LAYOUT_BUILD}
COMMAND ${PYTHON_EXECUTABLE} ${DOXY_MD_FILTER} ${DOCS_BUILD_DIR}
COMMENT "Pre-process markdown and image links")
# IE dev guide and C++ API
@@ -353,7 +341,7 @@ function(build_docs)
add_custom_command(TARGET openvino_docs
POST_BUILD
COMMAND ${Python3_EXECUTABLE} ${DOXY_LOG_SCRIPT} --log "${DOCS_BUILD_DIR}/ie_docs.log"
COMMAND ${PYTHON_EXECUTABLE} ${DOXY_LOG_SCRIPT} --log "${DOCS_BUILD_DIR}/ie_docs.log"
--include_omz $<BOOL:${OMZ_DOCS_DIR}>
--include_wb $<BOOL:${WORKBENCH_DOCS_DIR}>
--include_pot $<BOOL:${POT_DOCS_DIR}>
@@ -365,7 +353,7 @@ function(build_docs)
if(EXISTS "${LINKCHECKER_PY}")
add_custom_target(docs_check
COMMAND ${Python3_EXECUTABLE} "${LINKCHECKER_PY}" -v "${DOCS_BUILD_DIR}/html/"
COMMAND ${PYTHON_EXECUTABLE} "${LINKCHECKER_PY}" -v "${DOCS_BUILD_DIR}/html/"
COMMENT "Check links in generated documentation"
WORKING_DIRECTORY "${DOCS_BUILD_DIR}"
VERBATIM)

View File

@@ -51,7 +51,7 @@ Intel® Core™ i3-8121U Processor
Intel® GNA hardware requires a driver to be installed on the system.
* Linux\* OS:
[Download Intel® GNA driver for Ubuntu Linux 18.04.3 LTS (with HWE Kernel version 5.0+)](https://download.01.org/opencv/drivers/gna/)
[Download Intel® GNA driver for Ubuntu Linux 18.04.3 LTS (with HWE Kernel version 5.4+)](https://storage.openvinotoolkit.org/drivers/gna/)
* Windows\* OS:
Intel® GNA driver for Windows is available through Windows Update\*

View File

@@ -500,6 +500,7 @@ Standard ONNX\* operators:
| Sigmoid | No |
| Sign | No |
| Sin | No |
| Size | No |
| Slice | No |
| Softmax | No |
| Softplus | No |

View File

@@ -2,7 +2,7 @@
The Intel® Distribution of OpenVINO™ toolkit quickly deploys applications and solutions that emulate human vision. Based on Convolutional Neural Networks (CNN), the toolkit extends computer vision (CV) workloads across Intel® hardware, maximizing performance. The Intel® Distribution of OpenVINO™ toolkit includes the Intel® Deep Learning Deployment Toolkit.
This guide provides the steps for creating a Docker* image with Intel® Distribution of OpenVINO™ toolkit for Linux* and further installation.
This guide provides device specifics for a Docker* image creation with Intel® Distribution of OpenVINO™ toolkit for Linux* and its further usage.
## System Requirements
@@ -10,25 +10,31 @@ This guide provides the steps for creating a Docker* image with Intel® Distribu
- Ubuntu\* 18.04 long-term support (LTS), 64-bit
- Ubuntu\* 20.04 long-term support (LTS), 64-bit
- CentOS\* 7.6
- Red Hat* Enterprise Linux* 8.2 (64 bit)
- CentOS\* 7
- Red Hat\* Enterprise Linux* 8 (64 bit)
**Host Operating Systems**
- Linux with installed GPU driver and with Linux kernel supported by GPU driver
- Linux
## Prebuilt images
Prebuilt images are available on:
Prebuilt images are available on:
- [Docker Hub](https://hub.docker.com/u/openvino)
- [Red Hat* Quay.io](https://quay.io/organization/openvino)
- [Red Hat* Ecosystem Catalog](https://catalog.redhat.com/software/containers/intel/openvino-runtime/606ff4d7ecb5241699188fb3)
## Build a Docker* Image
You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci). The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit.
You can also try our [Tutorials](https://github.com/openvinotoolkit/docker_ci/tree/master/docs/tutorials) which demonstrate the usage of OpenVINO™ Docker containers.
## Use Docker* Image for CPU
- Kernel reports the same information for all containers as for native application, for example, CPU, memory information.
- All instructions that are available to host process available for process in container, including, for example, AVX2, AVX512. No restrictions.
- Docker* does not use virtualization or emulation. The process in Docker* is just a regular Linux process, but it is isolated from external world on kernel level. Performance penalty is small.
- Docker\* does not use virtualization or emulation. The process in Docker* is just a regular Linux process, but it is isolated from external world on kernel level. Performance penalty is small.
### <a name="building-for-cpu"></a>Build a Docker* Image for CPU
@@ -155,7 +161,6 @@ ARG BUILD_DEPENDENCIES="autoconf \
unzip \
udev"
# hadolint ignore=DL3031, DL3033
RUN yum update -y && yum install -y ${BUILD_DEPENDENCIES} && \
yum group install -y "Development Tools" && \
yum clean all && rm -rf /var/cache/yum
@@ -248,12 +253,14 @@ $HDDL_INSTALL_DIR/hddldaemon
```
### Run the Docker* Image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
To run the built Docker* image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, use the following command:
```sh
docker run -it --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp <image_name>
```
> **NOTES**:
>
> - The device `/dev/ion` need to be shared to be able to use ion buffers among the plugin, `hddldaemon` and the kernel.
> - Since separate inference tasks share the same HDDL service communication interface (the service creates mutexes and a socket file in `/var/tmp`), `/var/tmp` needs to be mounted and shared among them.
@@ -262,6 +269,7 @@ In some cases, the ion driver is not enabled (for example, due to a newer kernel
docker run -it --rm --net=host -v /var/tmp:/var/tmp ipc=host <image_name>
```
> **NOTES**:
>
> - When building docker images, create a user in the docker file that has the same UID and GID as the user which runs hddldaemon on the host.
> - Run the application in the docker with this user.
> - Alternatively, you can start hddldaemon with the root user on host, but this approach is not recommended.
@@ -310,10 +318,6 @@ If you got proxy issues, please setup proxy settings for Docker. See the Proxy s
* [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs.
* Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)
* OpenVINO™ toolkit documentation: [https://docs.openvinotoolkit.org](https://docs.openvinotoolkit.org)
* Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)
* Intel® Neural Compute Stick 2 Get Started: [https://software.intel.com/en-us/neural-compute-stick/get-started](https://software.intel.com/en-us/neural-compute-stick/get-started)
* Intel® Distribution of OpenVINO™ toolkit Docker Hub* home page: [https://hub.docker.com/u/openvino](https://hub.docker.com/u/openvino)

View File

@@ -2,7 +2,7 @@
The Intel® Distribution of OpenVINO™ toolkit quickly deploys applications and solutions that emulate human vision. Based on Convolutional Neural Networks (CNN), the toolkit extends computer vision (CV) workloads across Intel® hardware, maximizing performance. The Intel® Distribution of OpenVINO™ toolkit includes the Intel® Deep Learning Deployment Toolkit.
This guide provides the steps for creating a Docker* image with Intel® Distribution of OpenVINO™ toolkit for Windows* and further installation.
This guide provides device specifics for a Docker* image creation with Intel® Distribution of OpenVINO™ toolkit for Linux* and its further usage.
## System Requirements
@@ -13,19 +13,22 @@ This guide provides the steps for creating a Docker* image with Intel® Distribu
**Host Operating Systems**
- Windows 10*, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or later) editions
- Windows Server* 2016 or higher
- Windows Server* 2016 or higher
## Prebuilt Images
Prebuilt images are available on [Docker Hub](https://hub.docker.com/u/openvino).
## Build a Docker* Image for CPU
## Build a Docker* Image
You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit.
The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit.
You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci). The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit.
## Build and Run the Docker* Image for CPU
## Install Additional Dependencies
### Install CMake
To add CMake to the image, add the following commands to the Dockerfile:
~~~
RUN powershell.exe -Command `
@@ -42,6 +45,7 @@ docker build . -t <image_name> `
~~~
### Install Microsoft Visual Studio* Build Tools
You can add Microsoft Visual Studio Build Tools* to a Windows* OS Docker image. Available options are to use offline installer for Build Tools
(follow the [Instruction for the offline installer](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019)) or
to use the online installer for Build Tools (follow [Instruction for the online installer](https://docs.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2019)).
@@ -79,6 +83,7 @@ docker run -itu ContainerAdministrator --rm <image_name> cmd /S /C "cd deploymen
## Build and Run the Docker* Image for GPU
GPU Acceleration in Windows containers feature requires to meet Windows host, OpenVINO toolkit and Docker* requirements:
* [Windows requirements](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/gpu-acceleration):
* The container host must be running Windows Server 2019 or Windows 10 of version 1809 or higher.
* The container base image must be `mcr.microsoft.com/windows:1809` or higher. Windows Server Core and Nano Server container images are not currently supported.
@@ -142,8 +147,4 @@ If you got proxy issues, please setup proxy settings for Docker. See the Proxy s
* [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs.
* Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)
* OpenVINO™ toolkit documentation: [https://docs.openvinotoolkit.org](https://docs.openvinotoolkit.org)
* Intel® Distribution of OpenVINO™ toolkit Docker Hub* home page: [https://hub.docker.com/u/openvino](https://hub.docker.com/u/openvino)
* Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)

View File

@@ -7,31 +7,35 @@
**Short description**: *NonZero* returns the indices of the non-zero elements of the input tensor.
**Detailed description**: *NonZero* returns the indices of the non-zero elements of the input tensor (in row-major order - by dimension).
The output tensor has shape `[rank(input), num_non_zero]`. For example, for the tensor `[[1, 0], [1, 1]]` the output will be `[[0, 1, 1], [0, 0, 1]]`.
* The output tensor has shape `[rank(input), num_non_zero]`.
* For example, for the tensor `[[1, 0], [1, 1]]` the output will be `[[0, 1, 1], [0, 0, 1]]`.
* The output is a collection of tuples, each tuple has `rank(input)` elements and contains indices for a single non-zero element.
* The `i`'th element of each output dimension is a part of `i`'th tuple.
* In given example the tuples would be: `[0, 0]`, `[1, 0]`, `[1, 1]`.
**Attributes**
* *output_type*
* **Description**: the output tensor type
* **Range of values**: "i64" or "i32"
* **Range of values**: `i64` or `i32`
* **Type**: string
* **Default value**: "i64"
* **Required**: *No*
**Inputs**:
* **1**: `data` tensor of arbitrary rank of type *T*. Required.
* **1**: A tensor of type *T* and arbitrary shape. **Required**.
**Outputs**:
* **1**: tensor with indices of non-zero elements of shape `[rank(data), num_non_zero]` of type *T_IND*.
* **1**: tensor with indices of non-zero elements of shape `[rank(data), num_non_zero]` of type *T_OUT*.
**Types**
* *T*: any type.
* *T_IND*: `int64` or `int32`.
* *T_OUT*: Depending on *output_type* attribute can be `int64` or `int32`.
**Example**
@@ -53,4 +57,4 @@ The output tensor has shape `[rank(input), num_non_zero]`. For example, for the
</port>
</output>
</layer>
```
```

View File

@@ -6,7 +6,7 @@
**Short description**: *RegionYolo* computes the coordinates of regions with probability for each class.
**Detailed description**: This operation is directly mapped to the original YOLO layer. [Reference](https://arxiv.org/pdf/1612.08242.pdf)
**Detailed description**: This operation is directly mapped to the [YOLO9000: Better, Faster, Stronger](https://arxiv.org/pdf/1612.08242.pdf) paper.
**Attributes**:
@@ -78,14 +78,17 @@
**Inputs**:
* **1**: `data` - 4D input tensor with floating point elements and shape `[N, C, H, W]`. Required.
* **1**: `data` - 4D tensor of type `T` and shape `[N, C, H, W]`. **Required.**
**Outputs**:
* **1**: output tensor of rank 4 or less that codes detected regions. Refer to the original YOLO paper to decode the output as boxes. `anchors` should be used to decode real box coordinates. If `do_softmax` is set to 0, then the output shape is `[N, (classes + coords + 1)*len(mask), H, W]`. If `do_softmax` is set to 1, then output shape is partially flattened and defined in the following way:
* **1**: tensor of type `T` and rank 4 or less that codes detected regions. Refer to the [YOLO9000: Better, Faster, Stronger](https://arxiv.org/pdf/1612.08242.pdf) paper to decode the output as boxes. `anchors` should be used to decode real box coordinates. If `do_softmax` is set to `0`, then the output shape is `[N, (classes + coords + 1) * len(mask), H, W]`. If `do_softmax` is set to `1`, then output shape is partially flattened and defined in the following way:
flat_dim = data.shape[axis] * data.shape[axis+1] * ... * data.shape[end_axis]
output.shape = [data.shape[0], ..., data.shape[axis-1], flat_dim, data.shape[end_axis + 1], ...]
`flat_dim = data.shape[axis] * data.shape[axis+1] * ... * data.shape[end_axis]`
`output.shape = [data.shape[0], ..., data.shape[axis-1], flat_dim, data.shape[end_axis + 1], ...]`
**Types**
* *T*: any supported floating point type.
**Example**

View File

@@ -21,7 +21,7 @@ function(ie_developer_export)
set(all_dev_targets gflags ie_libraries)
foreach(component IN LISTS openvino_export_components)
export(TARGETS ${${component}} NAMESPACE IE::
APPEND FILE "${CMAKE_BINARY_DIR}/${component}_dev_targets.cmake")
APPEND FILE "${CMAKE_BINARY_DIR}/${component}_dev_targets.cmake")
list(APPEND all_dev_targets ${${component}})
endforeach()
@@ -72,6 +72,18 @@ endif()
ie_cpack_add_component(cpp_samples DEPENDS core)
install(DIRECTORY ../thirdparty/zlib
DESTINATION ${IE_CPACK_IE_DIR}/samples/cpp/thirdparty
COMPONENT cpp_samples
USE_SOURCE_PERMISSIONS
PATTERN .clang-format EXCLUDE)
install(DIRECTORY ../thirdparty/cnpy
DESTINATION ${IE_CPACK_IE_DIR}/samples/cpp/thirdparty
COMPONENT cpp_samples
USE_SOURCE_PERMISSIONS
PATTERN .clang-format EXCLUDE)
if(UNIX)
install(DIRECTORY samples/
DESTINATION ${IE_CPACK_IE_DIR}/samples/cpp
@@ -131,24 +143,30 @@ endif()
#
openvino_developer_export_targets(COMPONENT openvino_common TARGETS format_reader gflags ie_samples_utils)
openvino_developer_export_targets(COMPONENT ngraph TARGETS ${NGRAPH_LIBRARIES})
# for Template plugin
if(NGRAPH_INTERPRETER_ENABLE)
openvino_developer_export_targets(COMPONENT ngraph TARGETS ngraph_backend interpreter_backend)
endif()
ie_developer_export()
function(ie_generate_dev_package_config)
# dummy check that OpenCV is here
find_package(OpenCV QUIET)
configure_file(
"${IE_MAIN_SOURCE_DIR}/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in"
"${CMAKE_BINARY_DIR}/InferenceEngineDeveloperPackageConfig.cmake"
@ONLY)
ie_developer_export()
configure_file(
"${IE_MAIN_SOURCE_DIR}/cmake/templates/InferenceEngineConfig-version.cmake.in"
"${CMAKE_BINARY_DIR}/InferenceEngineDeveloperPackageConfig-version.cmake"
@ONLY)
configure_package_config_file("${InferenceEngine_SOURCE_DIR}/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in"
"${CMAKE_BINARY_DIR}/InferenceEngineDeveloperPackageConfig.cmake"
INSTALL_DESTINATION share # not used
PATH_VARS "OpenVINO_MAIN_SOURCE_DIR;IE_MAIN_SOURCE_DIR;gflags_BINARY_DIR"
NO_CHECK_REQUIRED_COMPONENTS_MACRO)
configure_file("${IE_MAIN_SOURCE_DIR}/cmake/templates/InferenceEngineConfig-version.cmake.in"
"${CMAKE_BINARY_DIR}/InferenceEngineDeveloperPackageConfig-version.cmake"
@ONLY)
endfunction()
ie_generate_dev_package_config()
#
# Coverage
@@ -163,6 +181,10 @@ endif()
#
function(register_extra_modules)
# post export
ie_developer_export_targets(inference_engine)
openvino_developer_export_targets(COMPONENT ngraph TARGETS ${NGRAPH_LIBRARIES})
set(InferenceEngineDeveloperPackage_DIR "${CMAKE_CURRENT_BINARY_DIR}/build-modules")
function(generate_fake_dev_package)

View File

@@ -66,8 +66,6 @@ ie_dependent_option (ENABLE_SPEECH_DEMO "enable speech demo integration" ON "NOT
ie_option (ENABLE_OPENCV "enables OpenCV" ON)
ie_option (ENABLE_PYTHON "enables ie python bridge build" OFF)
ie_option (ENABLE_V7_SERIALIZE "enables serialization to IR v7" OFF)
set(IE_EXTRA_MODULES "" CACHE STRING "Extra paths for extra modules to include into OpenVINO build")

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2018-2020 Intel Corporation
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#

View File

@@ -2,9 +2,13 @@
# SPDX-License-Identifier: Apache-2.0
#
@PACKAGE_INIT@
include(CMakeFindDependencyMacro)
# TODO: remove after changing [private plugins]
set(OpenVINO_MAIN_SOURCE_DIR "@OpenVINO_MAIN_SOURCE_DIR@") # KMB
set(IE_MAIN_SOURCE_DIR "@IE_MAIN_SOURCE_DIR@") # HDDL
set_and_check(OpenVINO_MAIN_SOURCE_DIR "@OpenVINO_MAIN_SOURCE_DIR@") # KMB
set_and_check(IE_MAIN_SOURCE_DIR "@IE_MAIN_SOURCE_DIR@") # HDDL
# Variables to export in plugin's projects
@@ -22,32 +26,46 @@ endforeach()
message("")
# for samples in 3rd party projects
set(gflags_DIR "@gflags_BINARY_DIR@")
set_and_check(gflags_DIR "@gflags_BINARY_DIR@")
# Targets
#
# Content
#
if(USE_SYSTEM_PUGIXML)
find_package(PugiXML REQUIRED)
set_property(TARGET pugixml PROPERTY IMPORTED_GLOBAL TRUE)
endif()
find_dependency(IEDevScripts
PATHS "${OpenVINO_MAIN_SOURCE_DIR}/cmake/developer_package"
NO_CMAKE_FIND_ROOT_PATH
NO_DEFAULT_PATH)
find_dependency(InferenceEngine
PATHS "${CMAKE_CURRENT_LIST_DIR}"
NO_CMAKE_FIND_ROOT_PATH
NO_DEFAULT_PATH)
# WA for cmake: it exports ngraph as IE::ngraph in the IE export list
# while we already have ngraph export in its own export list as ngraph::ngraph
set_property(TARGET ngraph::ngraph PROPERTY IMPORTED_GLOBAL TRUE)
add_library(IE::ngraph ALIAS ngraph::ngraph)
foreach(component @openvino_export_components@)
include("${CMAKE_CURRENT_LIST_DIR}/${component}_dev_targets.cmake")
endforeach()
set(InferenceEngine_LIBRARIES IE::inference_engine)
if(USE_SYSTEM_PUGIXML)
find_dependency(PugiXML)
set_property(TARGET pugixml PROPERTY IMPORTED_GLOBAL TRUE)
endif()
# inherit OpenCV from main IE project if enabled
if ("@OpenCV_FOUND@")
load_cache("${cache_path}" READ_WITH_PREFIX "" OpenCV_DIR)
find_dependency(OpenCV)
endif()
#
# Common cmake includes
# Extra Compile Flags
#
# Inference Engine Developer Scripts package
find_package(IEDevScripts REQUIRED
PATHS "@OpenVINO_MAIN_SOURCE_DIR@/cmake/developer_package"
NO_CMAKE_FIND_ROOT_PATH
NO_DEFAULT_PATH)
if(NOT MSVC)
ie_add_compiler_flags(-Wno-error=unused-variable)
if(CMAKE_COMPILER_IS_GNUCXX)
@@ -57,15 +75,3 @@ endif()
# Don't threat deprecated API warnings as errors in 3rd party apps
ie_deprecated_no_errors()
# inherit OpenCV from main IE project if enabled
if (ENABLE_OPENCV)
load_cache("${cache_path}" READ_WITH_PREFIX "" OpenCV_DIR)
find_package(OpenCV)
endif()
# inherit TBB from main IE project if enabled
if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
load_cache("${cache_path}" READ_WITH_PREFIX "" TBB_DIR)
find_package(TBB)
endif()

View File

@@ -8,18 +8,6 @@ cmake_minimum_required (VERSION 3.13)
# Set the project name
project (ie_python_api)
option(ENABLE_CONDA_FOLDER "Create output folder with conda python bindings" OFF)
option(ENABLE_WHEEL "Create wheel package" OFF)
set(PYTHON_BRIDGE_CPACK_PATH "python")
string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH)
if(ARCH STREQUAL "x86_64" OR ARCH STREQUAL "amd64") # Windows detects Intel's 64-bit CPU as AMD64
set(ARCH intel64)
elseif(ARCH STREQUAL "i386")
set(ARCH ia32)
endif()
if(DEFINED IE_MAIN_SOURCE_DIR)
set(InferenceEngine_LIBRARIES inference_engine)
else()
@@ -27,6 +15,12 @@ else()
set(InferenceEngine_LIBRARIES IE::inference_engine)
endif()
option(ENABLE_CONDA_FOLDER "Create output folder with conda python bindings" OFF)
cmake_dependent_option(ENABLE_WHEEL "Create wheel package" OFF
"PYTHONINTERP_FOUND;NOT CMAKE_SOURCE_DIR STREQUAL ie_python_api_SOURCE_DIR" OFF)
set(PYTHON_BRIDGE_CPACK_PATH "python")
if(UNIX)
# cython generated files requires public visibility. Force visibility required.
set(CMAKE_CXX_VISIBILITY_PRESET default)
@@ -35,10 +29,17 @@ endif()
include (cmake/UseCython.cmake)
if(PYTHONINTERP_FOUND)
set(PYTHON_VERSION python${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR})
# Check Cython version
if(CYTHON_VERSION VERSION_LESS "0.29")
message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found version ${CYTHON_VERSION}")
else()
message(FATAL_ERROR "Python Interpretator was not found!")
message(STATUS "Found Cython version ${CYTHON_VERSION}")
endif()
if(PYTHONLIBS_VERSION_STRING MATCHES "^([0-9]+)\.([0-9]+).*")
set(PYTHON_VERSION python${CMAKE_MATCH_1}.${CMAKE_MATCH_2})
else()
message(FATAL_ERROR "Failed to extract python major.minor from ${PYTHONLIBS_VERSION_STRING}")
endif()
if(ENABLE_CONDA_FOLDER)
@@ -67,13 +68,6 @@ if(ENABLE_WHEEL)
add_subdirectory(wheel)
endif()
# Check Cython version
if(CYTHON_VERSION VERSION_LESS "0.29")
message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found version ${CYTHON_VERSION}")
else()
message(STATUS "Found Cython version ${CYTHON_VERSION}")
endif()
# install
ie_cpack_add_component(${PYTHON_VERSION})

View File

@@ -29,15 +29,16 @@
# See also UseCython.cmake
# Use the Cython executable that lives next to the Python executable
# if it is a local installation.
find_package( PythonInterp )
find_package(PythonInterp 3 QUIET)
if( PYTHONINTERP_FOUND )
get_filename_component( _python_path ${PYTHON_EXECUTABLE} PATH )
find_program( CYTHON_EXECUTABLE
find_host_program( CYTHON_EXECUTABLE
NAMES cython cython.bat cython3
HINTS ${_python_path} $ENV{HOME}/.local/bin
)
else()
find_program( CYTHON_EXECUTABLE
find_host_program( CYTHON_EXECUTABLE
NAMES cython cython.bat cython3
)
endif()

View File

@@ -13,10 +13,6 @@
#
# cython_add_module( <module_name> <src1> <src2> ... <srcN> )
#
# To create a standalone executable, the function
#
# cython_add_standalone_executable( <executable_name> [MAIN_MODULE src1] <src1> <src2> ... <srcN> )
#
# To avoid dependence on Python, set the PYTHON_LIBRARY cache variable to point
# to a static library. If a MAIN_MODULE source is specified,
# the "if __name__ == '__main__':" from that module is used as the C main() method
@@ -92,7 +88,7 @@ find_package( Cython REQUIRED
PATHS "${CMAKE_CURRENT_SOURCE_DIR}/cmake"
NO_CMAKE_FIND_ROOT_PATH
NO_DEFAULT_PATH )
find_package( PythonLibs REQUIRED )
find_package(PythonLibs 3 REQUIRED)
set( CYTHON_CXX_EXTENSION "cxx" )
set( CYTHON_C_EXTENSION "c" )
@@ -239,9 +235,7 @@ function( compile_pyx _name generated_file )
set( cython_debug_arg "--gdb" )
endif()
if( "${PYTHONLIBS_VERSION_STRING}" MATCHES "^2." )
set( version_arg "-2" )
elseif( "${PYTHONLIBS_VERSION_STRING}" MATCHES "^3." )
if( "${PYTHONLIBS_VERSION_STRING}" MATCHES "^3." )
set( version_arg "-3" )
else()
set( version_arg )
@@ -292,48 +286,12 @@ function( cython_add_module _name )
endif()
endforeach()
compile_pyx( ${_name} generated_file ${pyx_module_sources} )
include_directories( ${PYTHON_INCLUDE_DIRS} )
python_add_module( ${_name} ${generated_file} ${other_module_sources} )
python_add_module ( ${_name} ${generated_file} ${other_module_sources} )
target_include_directories( ${_name} PRIVATE ${PYTHON_INCLUDE_DIRS})
# set_target_properties(${_name} PROPERTIES PREFIX "" SUFFIX "${PYTHON_MODULE_EXTENSION}")
if( APPLE )
set_target_properties( ${_name} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup" )
else()
target_link_libraries( ${_name} PRIVATE ${PYTHON_LIBRARIES} )
endif()
endfunction()
include( CMakeParseArguments )
# cython_add_standalone_executable( _name [MAIN_MODULE src3.py] src1 src2 ... srcN )
# Creates a standalone executable the given sources.
function( cython_add_standalone_executable _name )
set( pyx_module_sources "" )
set( other_module_sources "" )
set( main_module "" )
cmake_parse_arguments( cython_arguments "" "MAIN_MODULE" "" ${ARGN} )
include_directories( ${PYTHON_INCLUDE_DIRS} )
foreach( _file ${cython_arguments_UNPARSED_ARGUMENTS} )
if( ${_file} MATCHES ".*\\.py[x]?$" )
get_filename_component( _file_we ${_file} NAME_WE )
if( "${_file_we}" STREQUAL "${_name}" )
set( main_module "${_file}" )
elseif( NOT "${_file}" STREQUAL "${cython_arguments_MAIN_MODULE}" )
set( PYTHON_MODULE_${_file_we}_static_BUILD_SHARED OFF )
compile_pyx( "${_file_we}_static" generated_file "${_file}" )
list( APPEND pyx_module_sources "${generated_file}" )
endif()
else()
list( APPEND other_module_sources ${_file} )
endif()
endforeach()
if( cython_arguments_MAIN_MODULE )
set( main_module ${cython_arguments_MAIN_MODULE} )
endif()
if( NOT main_module )
message( FATAL_ERROR "main module not found." )
endif()
get_filename_component( main_module_we "${main_module}" NAME_WE )
set( CYTHON_FLAGS ${CYTHON_FLAGS} --embed )
compile_pyx( "${main_module_we}_static" generated_file ${main_module} )
add_executable( ${_name} ${generated_file} ${pyx_module_sources} ${other_module_sources} )
target_link_libraries( ${_name} PRIVATE ${PYTHON_LIBRARIES} ${pyx_module_libs} )
endfunction()

View File

@@ -1 +1 @@
numpy~=1.19.5
numpy>=1.16.6,<1.20

View File

@@ -1,2 +1,2 @@
opencv-python==4.5.*
numpy~=1.19.5
numpy>=1.16.6,<1.20

View File

@@ -9,29 +9,29 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}/inference_e
set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}/inference_engine)
set(CMAKE_PDB_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}/inference_engine)
file(GLOB SOURCE
${CMAKE_CURRENT_SOURCE_DIR}/ie_api.pyx
${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ie_api.pyx
${CMAKE_CURRENT_SOURCE_DIR}/*.pxd
${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
set_source_files_properties(${SOURCE} PROPERTIES CYTHON_IS_CXX ON)
file(GLOB PYX_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.pyx)
set_source_files_properties(${PYX_SOURCES} PROPERTIES CYTHON_IS_CXX ON)
# create target
cython_add_module(${TARGET_NAME} ${SOURCE})
cython_add_module(${TARGET_NAME} ${SOURCES})
set(INSTALLED_TARGETS ${TARGET_NAME})
file(GLOB OTHER_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.pyx)
list(REMOVE_ITEM OTHER_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/ie_api.pyx")
list(REMOVE_ITEM PYX_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/ie_api.pyx")
foreach(PYX_FILE ${OTHER_SOURCES})
foreach(PYX_FILE IN LISTS PYX_SOURCES)
get_filename_component(PYX_NAME "${PYX_FILE}" NAME_WE)
set_source_files_properties(${PYX_FILE} PROPERTIES CYTHON_IS_CXX ON)
cython_add_module(${PYX_NAME} ${PYX_FILE})
add_dependencies(${TARGET_NAME} ${PYX_NAME})
target_include_directories(${PYX_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
target_link_libraries(${PYX_NAME} PRIVATE ${InferenceEngine_LIBRARIES})
list(APPEND INSTALLED_TARGETS ${PYX_NAME})
ie_python_minimal_api(${PYX_NAME})
endforeach()
if(COMMAND ie_add_vs_version_file)
@@ -48,6 +48,7 @@ function(python_disable_deprecated_warnings)
endfunction()
python_disable_deprecated_warnings()
ie_python_minimal_api(${TARGET_NAME})
target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
target_link_libraries(${TARGET_NAME} PRIVATE ${InferenceEngine_LIBRARIES})
@@ -60,7 +61,7 @@ endif()
# perform copy
add_custom_command(TARGET ${TARGET_NAME}
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${PYTHON_BRIDGE_SRC_ROOT}/src/openvino/inference_engine/__init__.py ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/__init__.py
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/__init__.py ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/__init__.py
COMMAND ${CMAKE_COMMAND} -E copy ${PYTHON_BRIDGE_SRC_ROOT}/requirements.txt ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/../../requirements.txt
COMMAND ${CMAKE_COMMAND} -E copy ${PYTHON_BRIDGE_SRC_ROOT}/requirements.txt ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}/../../requirements.txt
COMMAND ${CMAKE_COMMAND} -E copy ${PYTHON_BRIDGE_SRC_ROOT}/src/openvino/__init__.py ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/../__init__.py

View File

@@ -9,24 +9,22 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}/offline_tra
set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}/offline_transformations)
set(CMAKE_PDB_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}/offline_transformations)
file(GLOB SOURCE
${CMAKE_CURRENT_SOURCE_DIR}/offline_transformations_api.pyx
${CMAKE_CURRENT_SOURCE_DIR}/offline_transformations_api_impl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/offline_transformations_api_impl_defs.pxd
${CMAKE_CURRENT_SOURCE_DIR}/offline_transformations_api.pyx
${CMAKE_CURRENT_SOURCE_DIR}/offline_transformations_api_impl.hpp
${CMAKE_CURRENT_SOURCE_DIR}/offline_transformations_api_impl.cpp)
set_source_files_properties(${SOURCE} PROPERTIES CYTHON_IS_CXX ON)
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/offline_transformations_api.pyx
PROPERTIES CYTHON_IS_CXX ON)
# create target
cython_add_module(${TARGET_NAME} ${SOURCE})
set(INSTALLED_TARGETS ${TARGET_NAME})
cython_add_module(${TARGET_NAME} ${SOURCES})
add_dependencies(${TARGET_NAME} ie_api)
if(COMMAND ie_add_vs_version_file)
foreach(target IN LISTS INSTALLED_TARGETS)
ie_add_vs_version_file(NAME ${target}
FILEDESCRIPTION "Offline Transformatoins Python library")
endforeach()
ie_add_vs_version_file(NAME ${TARGET_NAME}
FILEDESCRIPTION "Offline Transformatoins Python library")
endif()
if(TARGET offline_transformations)
@@ -44,6 +42,8 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
target_compile_options(${TARGET_NAME} PRIVATE "-Wno-error=register")
endif()
add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
# perform copy
add_custom_command(TARGET ${TARGET_NAME}
POST_BUILD
@@ -52,12 +52,13 @@ add_custom_command(TARGET ${TARGET_NAME}
# install
install(TARGETS ${INSTALLED_TARGETS}
# TODO: use ${PYTHON_VERSION}_dev component below
# ie_cpack_add_component(${PYTHON_VERSION}_dev DEPENDS ${PYTHON_VERSION})
install(TARGETS ${TARGET_NAME}
RUNTIME DESTINATION python/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_VERSION}
LIBRARY DESTINATION python/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_VERSION})
install(PROGRAMS __init__.py
DESTINATION python/${PYTHON_VERSION}/openvino/offline_transformations
COMPONENT ${PYTHON_VERSION})
add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})

View File

@@ -17,8 +17,8 @@ def ApplyPOTTransformations(IENetwork network, string device):
C.ApplyPOTTransformations(network.impl, device)
def ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations=1):
C.ApplyLowLatencyTransformation(network.impl, num_iterations)
def ApplyLowLatencyTransformation(IENetwork network, bool use_const_initializer = True):
C.ApplyLowLatencyTransformation(network.impl, use_const_initializer)
def ApplyPruningTransformation(IENetwork network):

View File

@@ -26,16 +26,9 @@ void InferenceEnginePython::ApplyPOTTransformations(InferenceEnginePython::IENet
manager.run_passes(network.actual->getFunction());
}
void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations) {
void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, bool use_const_initializer) {
ngraph::pass::Manager manager;
// TODO: pass num_iterations to LowLatency
manager.register_pass<ngraph::pass::LowLatency>();
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
auto pass_config = manager.get_pass_config();
pass_config->set_callback<ngraph::pass::UnrollTensorIterator>([](const std::shared_ptr<const ngraph::Node>& node) -> bool {
return node->get_rt_info().count("UNROLL_TI") == 0;
});
manager.register_pass<ngraph::pass::LowLatency2>(use_const_initializer);
manager.run_passes(network.actual->getFunction());
}

View File

@@ -15,7 +15,7 @@ void ApplyMOCTransformations(InferenceEnginePython::IENetwork network, bool cf);
void ApplyPOTTransformations(InferenceEnginePython::IENetwork network, std::string device);
void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations);
void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, bool use_const_initializer = true);
void ApplyPruningTransformation(InferenceEnginePython::IENetwork network);

View File

@@ -3,7 +3,6 @@
from libcpp cimport bool
from libcpp.string cimport string
from libc.stdint cimport int64_t
from ..inference_engine.ie_api_impl_defs cimport IENetwork
@@ -12,10 +11,10 @@ cdef extern from "offline_transformations_api_impl.hpp" namespace "InferenceEngi
cdef void ApplyPOTTransformations(IENetwork network, string device)
cdef void ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations)
cdef void ApplyLowLatencyTransformation(IENetwork network, bool use_const_initializer)
cdef void ApplyPruningTransformation(IENetwork network)
cdef void GenerateMappingFile(IENetwork network, string path, bool extract_names)
cdef void CheckAPI()
cdef void CheckAPI()

View File

@@ -9,24 +9,22 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}/test_utils)
set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}/test_utils)
set(CMAKE_PDB_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}/test_utils)
file(GLOB SOURCE
${CMAKE_CURRENT_SOURCE_DIR}/test_utils_api.pyx
${CMAKE_CURRENT_SOURCE_DIR}/test_utils_api_impl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/test_utils_api.pyx
${CMAKE_CURRENT_SOURCE_DIR}/test_utils_api_impl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_utils_api_impl.hpp
${CMAKE_CURRENT_SOURCE_DIR}/test_utils_api_impl_defs.pxd)
set_source_files_properties(${SOURCE} PROPERTIES CYTHON_IS_CXX ON)
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/test_utils_api.pyx
PROPERTIES CYTHON_IS_CXX ON)
# create target
cython_add_module(${TARGET_NAME} ${SOURCE})
set(INSTALLED_TARGETS ${TARGET_NAME})
cython_add_module(${TARGET_NAME} ${SOURCES})
add_dependencies(${TARGET_NAME} ie_api)
if(COMMAND ie_add_vs_version_file)
foreach(target IN LISTS INSTALLED_TARGETS)
ie_add_vs_version_file(NAME ${target}
FILEDESCRIPTION "Test Utils Python library")
endforeach()
ie_add_vs_version_file(NAME ${TARGET_NAME}
FILEDESCRIPTION "Test Utils Python library")
endif()
if(TARGET commonTestUtils)

View File

@@ -49,4 +49,4 @@ def test_pruning_transformations():
f = ng.function_from_cnn(net)
assert f != None
assert len(f.get_ops()) == 3
assert len(f.get_ops()) == 3

View File

@@ -1,16 +1,16 @@
WHEEL_PACKAGE_NAME=${WHEEL_PACKAGE_NAME}
WHEEL_VERSION=${WHEEL_VERSION}
WHEEL_LICENCE_TYPE=${WHEEL_LICENCE_TYPE}
WHEEL_AUTHOR=${WHEEL_AUTHOR}
WHEEL_AUTHOR_EMAIL=${WHEEL_AUTHOR_EMAIL}
WHEEL_DESC=${WHEEL_DESC}
WHEEL_LICENSE=${WHEEL_LICENSE}
WHEEL_REQUIREMENTS=${WHEEL_REQUIREMENTS}
WHEEL_OVERVIEW=${WHEEL_OVERVIEW}
WHEEL_PACKAGE_NAME=@WHEEL_PACKAGE_NAME@
WHEEL_VERSION=@WHEEL_VERSION@
WHEEL_LICENCE_TYPE=@WHEEL_LICENCE_TYPE@
WHEEL_AUTHOR=@WHEEL_AUTHOR@
WHEEL_AUTHOR_EMAIL=@WHEEL_AUTHOR_EMAIL@
WHEEL_DESC=@WHEEL_DESC@
WHEEL_LICENSE=@WHEEL_LICENSE@
WHEEL_REQUIREMENTS=@WHEEL_REQUIREMENTS@
WHEEL_OVERVIEW=@WHEEL_OVERVIEW@
CMAKE_BUILD_DIR=${CMAKE_BINARY_DIR}
CORE_LIBS_DIR=${IE_CPACK_RUNTIME_PATH}
PLUGINS_LIBS_DIR=${PLUGINS_LIBS_DIR}
NGRAPH_LIBS_DIR=${NGRAPH_LIBS_DIR}
TBB_LIBS_DIR=${TBB_LIBS_DIR}
PY_PACKAGES_DIR=${PY_PACKAGES_DIR}
CMAKE_BUILD_DIR=@CMAKE_BINARY_DIR@
CORE_LIBS_DIR=@IE_CPACK_RUNTIME_PATH@
PLUGINS_LIBS_DIR=@PLUGINS_LIBS_DIR@
NGRAPH_LIBS_DIR=@NGRAPH_LIBS_DIR@
TBB_LIBS_DIR=@TBB_LIBS_DIR@
PY_PACKAGES_DIR=@PY_PACKAGES_DIR@

View File

@@ -16,7 +16,7 @@ set(WHEEL_REQUIREMENTS "${CMAKE_CURRENT_SOURCE_DIR}/meta/openvino.requirements.t
set(WHEEL_OVERVIEW "${CMAKE_CURRENT_SOURCE_DIR}/meta/pypi_overview.md" CACHE STRING "Detailed description")
set(SETUP_PY "${CMAKE_CURRENT_SOURCE_DIR}/setup.py")
set(SETUP_ENV "${CMAKE_CURRENT_SOURCE_DIR}/.env.in")
set(SETUP_ENV "${CMAKE_CURRENT_SOURCE_DIR}/.env.in")
set(CORE_LIBS_DIR ${IE_CPACK_RUNTIME_PATH})
set(PLUGINS_LIBS_DIR ${IE_CPACK_RUNTIME_PATH})
@@ -24,7 +24,6 @@ set(NGRAPH_LIBS_DIR deployment_tools/ngraph/lib)
set(PY_PACKAGES_DIR ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION})
set(TBB_LIBS_DIR deployment_tools/inference_engine/external/tbb/lib)
if(APPLE)
set(WHEEL_PLATFORM macosx_10_15_x86_64)
elseif(UNIX)
@@ -36,28 +35,40 @@ else()
message(FATAL_ERROR "This platform is not supported")
endif()
configure_file(${SETUP_ENV} "${CMAKE_CURRENT_SOURCE_DIR}/.env")
configure_file(${SETUP_ENV} "${CMAKE_CURRENT_SOURCE_DIR}/.env" @ONLY)
add_custom_target(ie_wheel ALL DEPENDS ie_libraries ie_plugins ie_api)
add_custom_target(ie_wheel ALL DEPENDS ie_api offline_transformations_api)
if(TARGET _pyngraph)
add_dependencies(ie_wheel _pyngraph)
endif()
foreach(_target ie_libraries ie_plugins _pyngraph)
if(TARGET ${_target})
add_dependencies(ie_wheel ${_target})
endif()
endforeach()
if(LINUX)
find_host_program(patchelf_program NAMES patchelf)
find_host_program(patchelf_program
NAMES patchelf
DOC "Path to patchelf tool")
if(NOT patchelf_program)
message(FATAL_ERROR "patchelf is not found, which is needed to build ie_wheel")
endif()
endif()
add_custom_command(TARGET ie_wheel
PRE_BUILD
COMMAND ${CMAKE_COMMAND} -E rm -rf "${CMAKE_CURRENT_BINARY_DIR}/site-packages"
COMMAND ${PYTHON_EXECUTABLE} ${SETUP_PY} clean bdist_wheel
--dist-dir ${CMAKE_BINARY_DIR}/wheels
--build=${WHEEL_BUILD}
--plat-name=${WHEEL_PLATFORM}
POST_BUILD
COMMAND ${PYTHON_EXECUTABLE} ${SETUP_PY} bdist_wheel
--dist-dir ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/wheels
--build=${WHEEL_BUILD}
--plat-name=${WHEEL_PLATFORM}
COMMAND ${CMAKE_COMMAND} -E rm "${CMAKE_CURRENT_SOURCE_DIR}/.env"
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
COMMENT "Building Python wheel ${WHEEL_PACKAGE_NAME}"
VERBATIM
)
set_property(TARGET ie_wheel
APPEND
PROPERTY ADDITIONAL_CLEAN_FILES "${CMAKE_BINARY_DIR}/wheels"
)

View File

@@ -1,7 +1,7 @@
defusedxml>=0.7.1
scipy~=1.5.4
jstyleson~=0.0.2
numpy~=1.19.5
numpy>=1.16.6,<1.20
addict>=2.4.0
pandas~=1.1.5
hyperopt~=0.1.2

View File

@@ -1 +1 @@
numpy~=1.19.5
numpy>=1.16.6,<1.20

View File

@@ -8,9 +8,10 @@ import errno
import subprocess # nosec
import typing
from pathlib import Path
from shutil import copyfile
from shutil import copyfile, rmtree
from distutils.command.install import install
from distutils.command.build import build
from distutils.command.clean import clean
from distutils.errors import DistutilsSetupError
from distutils.file_util import copy_file
from distutils import log
@@ -160,6 +161,7 @@ class PrepareLibs(build_clib):
# additional blacklist filter, just to fix cmake install issues
blacklist = ['.lib', '.pdb', '_debug.dll', '_debug.dylib']
package_dir = os.path.join(get_package_dir(PY_INSTALL_CFG), WHEEL_LIBS_INSTALL_DIR)
for src_dir in src_dirs:
local_base_dir = Path(src_dir)
for file_path in local_base_dir.rglob('*'):
@@ -197,6 +199,22 @@ class CopyExt(build_ext):
copy_file(src, dst, verbose=self.verbose, dry_run=self.dry_run)
class CustomClean(clean):
"""Clean up staging directories"""
def clean(self, install_cfg):
for comp, comp_data in install_cfg.items():
install_prefix = comp_data.get('prefix')
self.announce(f'Cleaning {comp}: {install_prefix}', level=3)
if os.path.exists(install_prefix):
rmtree(install_prefix)
def run(self):
self.clean(LIB_INSTALL_CFG)
self.clean(PY_INSTALL_CFG)
clean.run(self)
def is_tool(name):
"""Check if the command-line tool is available"""
try:
@@ -330,6 +348,7 @@ package_license = config('WHEEL_LICENSE', '')
if os.path.exists(package_license):
copyfile(package_license, 'LICENSE')
packages = find_namespace_packages(','.join(get_dir_list(PY_INSTALL_CFG)))
package_data: typing.Dict[str, list] = {}
@@ -350,6 +369,7 @@ setup(
'install': CustomInstall,
'build_clib': PrepareLibs,
'build_ext': CopyExt,
'clean': CustomClean,
},
ext_modules=find_prebuilt_extensions(get_dir_list(PY_INSTALL_CFG)),
packages=packages,

View File

@@ -52,5 +52,41 @@ namespace InferenceEngine {
* @param network A network to apply LowLatency transformation
* *
*/
INFERENCE_ENGINE_DEPRECATED("This transformation will be removed in 2023.1. "
"Use InferenceEngine::lowLatency2 instead.")
INFERENCE_ENGINE_API_CPP(void) LowLatency(InferenceEngine::CNNNetwork& network);
/**
* @brief The transformation finds all TensorIterator/Loop layers in the network,
* processes all back edges that describe a connection between Result and Parameter
* of the TensorIterator/Loop bodies,and inserts ReadValue and Assign layers at the
* input and output corresponding to this back edge.
* Supported platforms: CPU, GNA.
*
* The example below describes the changes made by the transformation
* [] - TensorIterator body
* () - new layer
* BE - back-edge
*
* before applying the transformation:
* -> input1[BE_1 -> Parameter -> Layers ... -> Result -> BE_1 ]output1->
*
* after applying the transformation:
* ->(ReadValue)-> input1[BE_1 ->Parameter->Layers ...->Result->BE_1]output1 ->(Assign)
* \
* ->...
* After applying the transformation, the resulting network can be inferred
* step by step, the states will store between inferences.
* @param network A network to apply LowLatency transformation
* @param use_const_initializer Changes the type of the initializing subgraph for ReadValue operations.
If "true", then the transformation inserts Constant before ReadValue operation.
If "false, then the transformation leaves existed initializing subgraph for ReadValue operation.
* Loop operation by a given number. Does not affect TensorIterators.
* *
*/
INFERENCE_ENGINE_API_CPP(void) lowLatency2(InferenceEngine::CNNNetwork& network,
bool use_const_initializer = true);
} // namespace InferenceEngine

View File

@@ -129,6 +129,14 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/gflags")
add_gflags()
endif()
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zlib")
add_subdirectory(thirdparty/zlib EXCLUDE_FROM_ALL)
endif()
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/cnpy")
add_subdirectory(thirdparty/cnpy EXCLUDE_FROM_ALL)
endif()
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
endif()

View File

@@ -10,4 +10,3 @@ ie_add_sample(NAME benchmark_app
HEADERS ${HDR}
DEPENDENCIES format_reader ie_samples_utils
OPENCV_DEPENDENCIES core)

View File

@@ -2,7 +2,11 @@
# SPDX-License-Identifier: Apache-2.0
#
file (GLOB SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
file (GLOB HDR ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
${CMAKE_CURRENT_SOURCE_DIR}/*.h)
ie_add_sample(NAME speech_sample
SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp"
HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/speech_sample.hpp"
DEPENDENCIES ie_samples_utils)
SOURCES ${SRC}
HEADERS ${HDR}
DEPENDENCIES cnpy ie_samples_utils)

View File

@@ -2,7 +2,7 @@
This sample demonstrates how to execute an Asynchronous Inference of acoustic model based on Kaldi\* neural networks and speech feature vectors.
The sample works with Kaldi ARK files only, so it does not cover an end-to-end speech recognition scenario (speech to text), requiring additional preprocessing (feature extraction) to get a feature vector from a speech signal, as well as postprocessing (decoding) to produce text from scores.
The sample works with Kaldi ARK or Numpy* uncompressed NPZ files, so it does not cover an end-to-end speech recognition scenario (speech to text), requiring additional preprocessing (feature extraction) to get a feature vector from a speech signal, as well as postprocessing (decoding) to produce text from scores.
Automatic Speech Recognition C++ sample application demonstrates how to use the following Inference Engine C++ API in applications:
@@ -27,8 +27,8 @@ Basic Inference Engine API is covered by [Hello Classification C++ sample](../he
## How It Works
Upon the start-up, the application reads command line parameters and loads a Kaldi-trained neural network along with Kaldi ARK speech feature vector file to the Inference Engine plugin. Then it performs inference on all speech utterances stored in the input ARK file. Context-windowed speech frames are processed in batches of 1-8
frames according to the `-bs` parameter. Batching across utterances is not supported by this sample. When inference is done, the application creates an output ARK file. If the `-r` option is given, error
Upon the start-up, the application reads command line parameters, loads a specified model and input data to the Inference Engine plugin, performs synchronous inference on all speech utterances stored in the input file. Context-windowed speech frames are processed in batches of 1-8
frames according to the `-bs` parameter. Batching across utterances is not supported by this sample. When inference is done, the application creates an output file. If the `-r` option is given, error
statistics are provided for each speech utterance as shown above.
You can see the explicit description of
@@ -43,7 +43,7 @@ Several parameters control neural network quantization. The `-q` flag determines
Three modes are supported:
- *static* - The first
utterance in the input ARK file is scanned for dynamic range. The scale factor (floating point scalar multiplier) required to scale the maximum input value of the first utterance to 16384 (15 bits) is used
utterance in the input file is scanned for dynamic range. The scale factor (floating point scalar multiplier) required to scale the maximum input value of the first utterance to 16384 (15 bits) is used
for all subsequent inputs. The neural network is quantized to accommodate the scaled input dynamic range.
- *dynamic* - The user may specify a scale factor via the `-sf` flag that will be used for static quantization.
- *user-defined* - The scale factor for each input batch is computed
@@ -99,17 +99,17 @@ speech_sample [OPTION]
Options:
-h Print a usage message.
-i "<path>" Required. Paths to .ark files. Example of usage: <file1.ark,file2.ark> or <file.ark>.
-i "<path>" Required. Paths to input files. Example of usage: <file1.ark,file2.ark> or <file.ark> or <file.npz>.
-m "<path>" Required. Path to an .xml file with a trained model (required if -rg is missing).
-o "<path>" Optional. Output file name to save ark scores.
-o "<path>" Optional. Output file name to save scores. Example of usage: <output.ark> or <output.npz>
-d "<device>" Optional. Specify a target device to infer on. CPU, GPU, MYRIAD, GNA_AUTO, GNA_HW, GNA_SW_FP32, GNA_SW_EXACT and HETERO with combination of GNA
as the primary device and CPU as a secondary (e.g. HETERO:GNA,CPU) are supported. The list of available devices is shown below. The sample will look for a suitable plugin for device specified.
-pc Optional. Enables per-layer performance report.
-q "<mode>" Optional. Input quantization mode: "static" (default), "dynamic", or "user" (use with -sf).
-q "<mode>" Optional. Input quantization mode: static (default), dynamic, or user (use with -sf).
-qb "<integer>" Optional. Weight bits for quantization: 8 or 16 (default)
-sf "<double>" Optional. User-specified input scale factor for quantization (use with -q user). If the network contains multiple inputs, provide scale factors by separating them with commas.
-bs "<integer>" Optional. Batch size 1-8 (default 1)
-r "<path>" Optional. Read reference score .ark file and compare scores.
-r "<path>" Optional. Read referefile and compare scores. Example of usage: <reference.ark> or <reference.npz>
-rg "<path>" Read GNA model from file using path/filename provided (required if -m is missing).
-wg "<path>" Optional. Write GNA model to file using path/filename provided.
-we "<path>" Optional. Write GNA embedded model to file using path/filename provided.
@@ -118,10 +118,9 @@ Options:
If you use the cw_l or cw_r flag, then batch size and nthreads arguments are ignored.
-cw_r "<integer>" Optional. Number of frames for right context windows (default is 0). Works only with context window networks.
If you use the cw_r or cw_l flag, then batch size and nthreads arguments are ignored.
-oname "<outputs>" Optional. Layer names for output blobs. The names are separated with ",". Allows to change the order of output layers for -o flag.
Example: Output1:port,Output2:port.
-iname "<inputs>" Optional. Layer names for input blobs. The names are separated with ",". Allows to change the order of input layers for -i flag.
Example: Input1,Input2
-oname "<string>" Optional. Layer names for output blobs. The names are separated with "," Example: Output1:port,Output2:port
-iname "<string>" Optional. Layer names for input blobs. The names are separated with "," Example: Input1,Input2
-pwl_me "<double>" Optional. The maximum percent of error for PWL function.The value must be in <0, 100> range. The default value is 1.0.
Available target devices: <devices>
@@ -168,7 +167,7 @@ All of them can be downloaded from [https://storage.openvinotoolkit.org/models_c
## Sample Output
The acoustic log likelihood sequences for all utterances are stored in the Kaldi ARK file, `scores.ark`. If the `-r` option is used, a report on the statistical score error is generated for each utterance such as
The acoustic log likelihood sequences for all utterances are stored in the file. Example `scores.ark` or `scores.npz`. If the `-r` option is used, a report on the statistical score error is generated for each utterance such as
the following:
```sh

View File

@@ -0,0 +1,144 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "fileutils.hpp"
void ArkFile::GetFileInfo(const char* fileName, uint32_t numArrayToFindSize, uint32_t* ptrNumArrays, uint32_t* ptrNumMemoryBytes) {
uint32_t numArrays = 0;
uint32_t numMemoryBytes = 0;
std::ifstream in_file(fileName, std::ios::binary);
if (in_file.good()) {
while (!in_file.eof()) {
std::string line;
uint32_t numRows = 0u, numCols = 0u, num_bytes = 0u;
std::getline(in_file, line, '\0'); // read variable length name followed by space and NUL
std::getline(in_file, line, '\4'); // read "BFM" followed by space and control-D
if (line.compare("BFM ") != 0) {
break;
}
in_file.read(reinterpret_cast<char*>(&numRows), sizeof(uint32_t)); // read number of rows
std::getline(in_file, line, '\4'); // read control-D
in_file.read(reinterpret_cast<char*>(&numCols), sizeof(uint32_t)); // read number of columns
num_bytes = numRows * numCols * sizeof(float);
in_file.seekg(num_bytes, in_file.cur); // read data
if (numArrays == numArrayToFindSize) {
numMemoryBytes += num_bytes;
}
numArrays++;
}
in_file.close();
} else {
throw std::runtime_error(std::string("Failed to open %s for reading in GetFileInfo()!\n") + fileName);
}
if (ptrNumArrays != NULL)
*ptrNumArrays = numArrays;
if (ptrNumMemoryBytes != NULL)
*ptrNumMemoryBytes = numMemoryBytes;
}
void ArkFile::LoadFile(const char* fileName, uint32_t arrayIndex, std::string& ptrName, std::vector<uint8_t>& memory, uint32_t* ptrNumRows,
uint32_t* ptrNumColumns, uint32_t* ptrNumBytesPerElement) {
std::ifstream in_file(fileName, std::ios::binary);
if (in_file.good()) {
uint32_t i = 0;
while (i < arrayIndex) {
std::string line;
uint32_t numRows = 0u, numCols = 0u;
std::getline(in_file, line, '\0'); // read variable length name followed by space and NUL
std::getline(in_file, line, '\4'); // read "BFM" followed by space and control-D
if (line.compare("BFM ") != 0) {
break;
}
in_file.read(reinterpret_cast<char*>(&numRows), sizeof(uint32_t)); // read number of rows
std::getline(in_file, line, '\4'); // read control-D
in_file.read(reinterpret_cast<char*>(&numCols), sizeof(uint32_t)); // read number of columns
in_file.seekg(numRows * numCols * sizeof(float), in_file.cur); // read data
i++;
}
if (!in_file.eof()) {
std::string line;
std::getline(in_file, ptrName, '\0'); // read variable length name followed by space and NUL
std::getline(in_file, line, '\4'); // read "BFM" followed by space and control-D
if (line.compare("BFM ") != 0) {
throw std::runtime_error(std::string("Cannot find array specifier in file %s in LoadFile()!\n") + fileName);
}
in_file.read(reinterpret_cast<char*>(ptrNumRows), sizeof(uint32_t)); // read number of rows
std::getline(in_file, line, '\4'); // read control-D
in_file.read(reinterpret_cast<char*>(ptrNumColumns), sizeof(uint32_t)); // read number of columns
in_file.read(reinterpret_cast<char*>(&memory.front()),
*ptrNumRows * *ptrNumColumns * sizeof(float)); // read array data
}
in_file.close();
} else {
throw std::runtime_error(std::string("Failed to open %s for reading in LoadFile()!\n") + fileName);
}
*ptrNumBytesPerElement = sizeof(float);
}
void ArkFile::SaveFile(const char* fileName, bool shouldAppend, std::string name, void* ptrMemory, uint32_t numRows, uint32_t numColumns) {
std::ios_base::openmode mode = std::ios::binary;
if (shouldAppend) {
mode |= std::ios::app;
}
std::ofstream out_file(fileName, mode);
if (out_file.good()) {
out_file.write(name.c_str(), name.length()); // write name
out_file.write("\0", 1);
out_file.write("BFM ", 4);
out_file.write("\4", 1);
out_file.write(reinterpret_cast<char*>(&numRows), sizeof(uint32_t));
out_file.write("\4", 1);
out_file.write(reinterpret_cast<char*>(&numColumns), sizeof(uint32_t));
out_file.write(reinterpret_cast<char*>(ptrMemory), numRows * numColumns * sizeof(float));
out_file.close();
} else {
throw std::runtime_error(std::string("Failed to open %s for writing in SaveFile()!\n") + fileName);
}
}
void NumpyFile::GetFileInfo(const char* fileName, uint32_t numArrayToFindSize, uint32_t* ptrNumArrays, uint32_t* ptrNumMemoryBytes) {
uint32_t numArrays = 0;
uint32_t numMemoryBytes = 0;
cnpy::npz_t my_npz1 = cnpy::npz_load(fileName);
auto it = my_npz1.begin();
std::advance(it, numArrayToFindSize);
numArrays = my_npz1.size();
cnpy::NpyArray my_npy = it->second;
numMemoryBytes = my_npy.data_holder->size();
if (ptrNumArrays != NULL)
*ptrNumArrays = numArrays;
if (ptrNumMemoryBytes != NULL)
*ptrNumMemoryBytes = numMemoryBytes;
}
void NumpyFile::LoadFile(const char* fileName, uint32_t arrayIndex, std::string& ptrName, std::vector<uint8_t>& memory, uint32_t* ptrNumRows,
uint32_t* ptrNumColumns, uint32_t* ptrNumBytesPerElement) {
cnpy::npz_t my_npz1 = cnpy::npz_load(fileName);
auto it = my_npz1.begin();
std::advance(it, arrayIndex);
ptrName = it->first;
cnpy::NpyArray my_npy = it->second;
*ptrNumRows = my_npy.shape[0];
*ptrNumColumns = my_npy.shape[1];
for (size_t i = 0; i < my_npy.data_holder->size(); i++) {
memory.at(i) = my_npy.data_holder->at(i);
}
*ptrNumBytesPerElement = sizeof(float);
}
void NumpyFile::SaveFile(const char* fileName, bool shouldAppend, std::string name, void* ptrMemory, uint32_t numRows, uint32_t numColumns) {
std::string mode;
shouldAppend ? mode = "a" : mode = "w";
std::vector<size_t> shape {numRows, numColumns};
cnpy::npz_save(fileName, name, reinterpret_cast<float*>(ptrMemory), shape, mode);
}

View File

@@ -0,0 +1,100 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <cnpy.h>
#include <samples/common.hpp>
#include <samples/slog.hpp>
/// @brief Interface to work with files like input and output
class BaseFile {
public:
virtual void LoadFile(const char* fileName, uint32_t arrayIndex, std::string& ptrName, std::vector<uint8_t>& memory, uint32_t* ptrNumRows,
uint32_t* ptrNumColumns, uint32_t* ptrNumBytesPerElement) = 0;
virtual void SaveFile(const char* fileName, bool shouldAppend, std::string name, void* ptrMemory, uint32_t numRows, uint32_t numColumns) = 0;
virtual void GetFileInfo(const char* fileName, uint32_t numArrayToFindSize, uint32_t* ptrNumArrays, uint32_t* ptrNumMemoryBytes) = 0;
};
/// @brief Responsible to work with .ark files
class ArkFile : public BaseFile {
public:
/**
* @brief Get info from Kaldi ARK speech feature vector file
* @param fileName .ark file name
* @param numArrayToFindSize number speech feature vectors in the file
* @param ptrNumArrays pointer to specific number array
* @param ptrNumMemoryBytes pointer to specific number of memory bytes
* @return none.
*/
virtual void GetFileInfo(const char* fileName, uint32_t numArrayToFindSize, uint32_t* ptrNumArrays, uint32_t* ptrNumMemoryBytes);
/**
* @brief Load Kaldi ARK speech feature vector file
* @param fileName .ark file name
* @param arrayIndex number speech feature vector in the file
* @param ptrName reference to variable length name
* @param memory reference to speech feature vector to save
* @param ptrNumRows pointer to number of rows to read
* @param ptrNumColumns pointer to number of columns to read
* @param ptrNumBytesPerElement pointer to number bytes per element (size of float by default)
* @return none.
*/
virtual void LoadFile(const char* fileName, uint32_t arrayIndex, std::string& ptrName, std::vector<uint8_t>& memory, uint32_t* ptrNumRows,
uint32_t* ptrNumColumns, uint32_t* ptrNumBytesPerElement);
/**
* @brief Save Kaldi ARK speech feature vector file
* @param fileName .ark file name
* @param shouldAppend bool flag to rewrite or add to the end of file
* @param name reference to variable length name
* @param ptrMemory pointer to speech feature vector to save
* @param numRows number of rows
* @param numColumns number of columns
* @return none.
*/
virtual void SaveFile(const char* fileName, bool shouldAppend, std::string name, void* ptrMemory, uint32_t numRows, uint32_t numColumns);
};
/// @brief Responsible to work with .npz files
class NumpyFile : public BaseFile {
public:
/**
* @brief Get info from Numpy* uncompressed NPZ speech feature vector file
* @param fileName .npz file name
* @param numArrayToFindSize number speech feature vectors in the file
* @param ptrNumArrays pointer to specific number array
* @param ptrNumMemoryBytes pointer to specific number of memory bytes
* @return none.
*/
virtual void GetFileInfo(const char* fileName, uint32_t numArrayToFindSize, uint32_t* ptrNumArrays, uint32_t* ptrNumMemoryBytes);
/**
* @brief Load Numpy* uncompressed NPZ speech feature vector file
* @param fileName .npz file name
* @param arrayIndex number speech feature vector in the file
* @param ptrName reference to variable length name
* @param memory reference to speech feature vector to save
* @param ptrNumRows pointer to number of rows to read
* @param ptrNumColumns pointer to number of columns to read
* @param ptrNumBytesPerElement pointer to number bytes per element (size of float by default)
* @return none.
*/
virtual void LoadFile(const char* fileName, uint32_t arrayIndex, std::string& ptrName, std::vector<uint8_t>& memory, uint32_t* ptrNumRows,
uint32_t* ptrNumColumns, uint32_t* ptrNumBytesPerElement);
/**
* @brief Save Numpy* uncompressed NPZ speech feature vector file
* @param fileName .npz file name
* @param shouldAppend bool flag to rewrite or add to the end of file
* @param name reference to variable length name
* @param ptrMemory pointer to speech feature vector to save
* @param numRows number of rows
* @param numColumns number of columns
* @return none.
*/
virtual void SaveFile(const char* fileName, bool shouldAppend, std::string name, void* ptrMemory, uint32_t numRows, uint32_t numColumns);
};

View File

@@ -24,6 +24,7 @@
#include <utility>
#include <vector>
#include "fileutils.hpp"
#include "speech_sample.hpp"
#define MAX_SCORE_DIFFERENCE 0.0001f // max score difference for frame error threshold
@@ -63,144 +64,15 @@ struct InferRequestStruct {
/**
* @brief Check number of input files and model network inputs
* @param numInputs number model inputs
* @param numInputArkFiles number of input ARK files
* @param numInputFiles number of input files
* @return none.
*/
void CheckNumberOfInputs(size_t numInputs, size_t numInputArkFiles) {
if (numInputs != numInputArkFiles) {
void CheckNumberOfInputs(size_t numInputs, size_t numInputFiles) {
if (numInputs != numInputFiles) {
throw std::logic_error("Number of network inputs (" + std::to_string(numInputs) +
")"
" is not equal to number of ark files (" +
std::to_string(numInputArkFiles) + ")");
}
}
/**
* @brief Get info from Kaldi ARK speech feature vector file
* @param fileName .ark file name
* @param numArrayToFindSize number speech feature vectors in the file
* @param ptrNumArrays pointer to specific number array
* @param ptrNumMemoryBytes pointer to specific number of memory bytes
* @return none.
*/
void GetKaldiArkInfo(const char* fileName, uint32_t numArrayToFindSize, uint32_t* ptrNumArrays, uint32_t* ptrNumMemoryBytes) {
uint32_t numArrays = 0;
uint32_t numMemoryBytes = 0;
std::ifstream in_file(fileName, std::ios::binary);
if (in_file.good()) {
while (!in_file.eof()) {
std::string line;
uint32_t numRows = 0u, numCols = 0u, num_bytes = 0u;
std::getline(in_file, line, '\0'); // read variable length name followed by space and NUL
std::getline(in_file, line, '\4'); // read "BFM" followed by space and control-D
if (line.compare("BFM ") != 0) {
break;
}
in_file.read(reinterpret_cast<char*>(&numRows), sizeof(uint32_t)); // read number of rows
std::getline(in_file, line, '\4'); // read control-D
in_file.read(reinterpret_cast<char*>(&numCols), sizeof(uint32_t)); // read number of columns
num_bytes = numRows * numCols * sizeof(float);
in_file.seekg(num_bytes, in_file.cur); // read data
if (numArrays == numArrayToFindSize) {
numMemoryBytes += num_bytes;
}
numArrays++;
}
in_file.close();
} else {
fprintf(stderr, "Failed to open %s for reading in GetKaldiArkInfo()!\n", fileName);
exit(-1);
}
if (ptrNumArrays != NULL)
*ptrNumArrays = numArrays;
if (ptrNumMemoryBytes != NULL)
*ptrNumMemoryBytes = numMemoryBytes;
}
/**
* @brief Load Kaldi ARK speech feature vector file
* @param fileName .ark file name
* @param arrayIndex number speech feature vector in the file
* @param ptrName reference to variable length name
* @param memory reference to speech feature vector to save
* @param ptrNumRows pointer to number of rows to read
* @param ptrNumColumns pointer to number of columns to read
* @param ptrNumBytesPerElement pointer to number bytes per element (size of float by default)
* @return none.
*/
void LoadKaldiArkArray(const char* fileName, uint32_t arrayIndex, std::string& ptrName, std::vector<uint8_t>& memory, uint32_t* ptrNumRows,
uint32_t* ptrNumColumns, uint32_t* ptrNumBytesPerElement) {
std::ifstream in_file(fileName, std::ios::binary);
if (in_file.good()) {
uint32_t i = 0;
while (i < arrayIndex) {
std::string line;
uint32_t numRows = 0u, numCols = 0u;
std::getline(in_file, line, '\0'); // read variable length name followed by space and NUL
std::getline(in_file, line, '\4'); // read "BFM" followed by space and control-D
if (line.compare("BFM ") != 0) {
break;
}
in_file.read(reinterpret_cast<char*>(&numRows), sizeof(uint32_t)); // read number of rows
std::getline(in_file, line, '\4'); // read control-D
in_file.read(reinterpret_cast<char*>(&numCols), sizeof(uint32_t)); // read number of columns
in_file.seekg(numRows * numCols * sizeof(float), in_file.cur); // read data
i++;
}
if (!in_file.eof()) {
std::string line;
std::getline(in_file, ptrName, '\0'); // read variable length name followed by space and NUL
std::getline(in_file, line, '\4'); // read "BFM" followed by space and control-D
if (line.compare("BFM ") != 0) {
fprintf(stderr, "Cannot find array specifier in file %s in LoadKaldiArkArray()!\n", fileName);
exit(-1);
}
in_file.read(reinterpret_cast<char*>(ptrNumRows), sizeof(uint32_t)); // read number of rows
std::getline(in_file, line, '\4'); // read control-D
in_file.read(reinterpret_cast<char*>(ptrNumColumns), sizeof(uint32_t)); // read number of columns
in_file.read(reinterpret_cast<char*>(&memory.front()),
*ptrNumRows * *ptrNumColumns * sizeof(float)); // read array data
}
in_file.close();
} else {
fprintf(stderr, "Failed to open %s for reading in LoadKaldiArkArray()!\n", fileName);
exit(-1);
}
*ptrNumBytesPerElement = sizeof(float);
}
/**
* @brief Save Kaldi ARK speech feature vector file
* @param fileName .ark file name
* @param shouldAppend bool flag to rewrite or add to the end of file
* @param name reference to variable length name
* @param ptrMemory pointer to speech feature vector to save
* @param numRows number of rows
* @param numColumns number of columns
* @return none.
*/
void SaveKaldiArkArray(const char* fileName, bool shouldAppend, std::string name, void* ptrMemory, uint32_t numRows, uint32_t numColumns) {
std::ios_base::openmode mode = std::ios::binary;
if (shouldAppend) {
mode |= std::ios::app;
}
std::ofstream out_file(fileName, mode);
if (out_file.good()) {
out_file.write(name.c_str(), name.length()); // write name
out_file.write("\0", 1);
out_file.write("BFM ", 4);
out_file.write("\4", 1);
out_file.write(reinterpret_cast<char*>(&numRows), sizeof(uint32_t));
out_file.write("\4", 1);
out_file.write(reinterpret_cast<char*>(&numColumns), sizeof(uint32_t));
out_file.write(reinterpret_cast<char*>(ptrMemory), numRows * numColumns * sizeof(float));
out_file.close();
} else {
throw std::runtime_error(std::string("Failed to open %s for writing in SaveKaldiArkArray()!\n") + fileName);
" is not equal to number of input files (" +
std::to_string(numInputFiles) + ")");
}
}
@@ -637,7 +509,20 @@ int main(int argc, char* argv[]) {
return 0;
}
std::vector<std::string> inputArkFiles;
BaseFile* file;
BaseFile* fileOutput;
ArkFile arkFile;
NumpyFile numpyFile;
auto extInputFile = fileExt(FLAGS_i);
if (extInputFile == "ark") {
file = &arkFile;
} else if (extInputFile == "npz") {
file = &numpyFile;
} else {
throw std::logic_error("Invalid input file");
}
std::vector<std::string> inputFiles;
std::vector<uint32_t> numBytesThisUtterance;
uint32_t numUtterances(0);
if (!FLAGS_i.empty()) {
@@ -646,19 +531,19 @@ int main(int argc, char* argv[]) {
uint32_t currentNumUtterances(0), currentNumBytesThisUtterance(0);
while (getline(stream, outStr, ',')) {
std::string filename(fileNameNoExt(outStr) + ".ark");
inputArkFiles.push_back(filename);
std::string filename(fileNameNoExt(outStr) + "." + extInputFile);
inputFiles.push_back(filename);
GetKaldiArkInfo(filename.c_str(), 0, &currentNumUtterances, &currentNumBytesThisUtterance);
file->GetFileInfo(filename.c_str(), 0, &currentNumUtterances, &currentNumBytesThisUtterance);
if (numUtterances == 0) {
numUtterances = currentNumUtterances;
} else if (currentNumUtterances != numUtterances) {
throw std::logic_error("Incorrect input files. Number of utterance must be the same for all ark files");
throw std::logic_error("Incorrect input files. Number of utterance must be the same for all input files");
}
numBytesThisUtterance.push_back(currentNumBytesThisUtterance);
}
}
size_t numInputArkFiles(inputArkFiles.size());
size_t numInputFiles(inputFiles.size());
// -----------------------------------------------------------------------------------------------------
// --------------------------- Step 1. Initialize inference engine core -------------------------------------
@@ -689,7 +574,7 @@ int main(int argc, char* argv[]) {
if (!FLAGS_m.empty()) {
/** Read network model **/
network = ie.ReadNetwork(FLAGS_m);
CheckNumberOfInputs(network.getInputsInfo().size(), numInputArkFiles);
CheckNumberOfInputs(network.getInputsInfo().size(), numInputFiles);
// -------------------------------------------------------------------------------------------------
// --------------------------- Set batch size ---------------------------------------------------
@@ -718,9 +603,9 @@ int main(int argc, char* argv[]) {
slog::warn << "Custom scale factor will be ignored - using scale factor from provided imported gna model: " << FLAGS_rg << slog::endl;
} else {
auto scaleFactorInput = ParseScaleFactors(FLAGS_sf);
if (numInputArkFiles != scaleFactorInput.size()) {
if (numInputFiles != scaleFactorInput.size()) {
std::string errMessage("Incorrect command line for multiple inputs: " + std::to_string(scaleFactorInput.size()) +
" scale factors provided for " + std::to_string(numInputArkFiles) + " input files.");
" scale factors provided for " + std::to_string(numInputFiles) + " input files.");
throw std::logic_error(errMessage);
}
@@ -735,14 +620,14 @@ int main(int argc, char* argv[]) {
if (!FLAGS_rg.empty()) {
slog::info << "Using scale factor from provided imported gna model: " << FLAGS_rg << slog::endl;
} else {
for (size_t i = 0; i < numInputArkFiles; i++) {
auto inputArkName = inputArkFiles[i].c_str();
for (size_t i = 0; i < numInputFiles; i++) {
auto inputFileName = inputFiles[i].c_str();
std::string name;
std::vector<uint8_t> ptrFeatures;
uint32_t numArrays(0), numBytes(0), numFrames(0), numFrameElements(0), numBytesPerElement(0);
GetKaldiArkInfo(inputArkName, 0, &numArrays, &numBytes);
file->GetFileInfo(inputFileName, 0, &numArrays, &numBytes);
ptrFeatures.resize(numBytes);
LoadKaldiArkArray(inputArkName, 0, name, ptrFeatures, &numFrames, &numFrameElements, &numBytesPerElement);
file->LoadFile(inputFileName, 0, name, ptrFeatures, &numFrames, &numFrameElements, &numBytesPerElement);
auto floatScaleFactor = ScaleFactorForQuantization(ptrFeatures.data(), MAX_VAL_2B_FEAT, numFrames * numFrameElements);
slog::info << "Using scale factor of " << floatScaleFactor << " calculated from first utterance." << slog::endl;
std::string scaleFactorConfigKey = GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_") + std::to_string(i);
@@ -840,7 +725,7 @@ int main(int argc, char* argv[]) {
// --------------------------- Prepare input blobs -----------------------------------------------------
/** Taking information about all topology inputs **/
ConstInputsDataMap cInputInfo = executableNet.GetInputsInfo();
CheckNumberOfInputs(cInputInfo.size(), numInputArkFiles);
CheckNumberOfInputs(cInputInfo.size(), numInputFiles);
/** Stores all input blobs data **/
std::vector<Blob::Ptr> ptrInputBlobs;
@@ -934,7 +819,7 @@ int main(int argc, char* argv[]) {
std::vector<uint8_t> ptrReferenceScores;
score_error_t frameError, totalError;
ptrUtterances.resize(inputArkFiles.size());
ptrUtterances.resize(inputFiles.size());
// initialize memory state before starting
for (auto&& state : inferRequests.begin()->inferRequest.QueryState()) {
@@ -954,20 +839,20 @@ int main(int argc, char* argv[]) {
slog::info << "Number scores per frame : " << numScoresPerFrame << slog::endl;
/** Get information from ark file for current utterance **/
numFrameElementsInput.resize(numInputArkFiles);
for (size_t i = 0; i < inputArkFiles.size(); i++) {
/** Get information from input file for current utterance **/
numFrameElementsInput.resize(numInputFiles);
for (size_t i = 0; i < inputFiles.size(); i++) {
std::vector<uint8_t> ptrUtterance;
auto inputArkFilename = inputArkFiles[i].c_str();
auto inputFilename = inputFiles[i].c_str();
uint32_t currentNumFrames(0), currentNumFrameElementsInput(0), currentNumBytesPerElementInput(0);
GetKaldiArkInfo(inputArkFilename, utteranceIndex, &n, &numBytesThisUtterance[i]);
file->GetFileInfo(inputFilename, utteranceIndex, &n, &numBytesThisUtterance[i]);
ptrUtterance.resize(numBytesThisUtterance[i]);
LoadKaldiArkArray(inputArkFilename, utteranceIndex, uttName, ptrUtterance, &currentNumFrames, &currentNumFrameElementsInput,
&currentNumBytesPerElementInput);
file->LoadFile(inputFilename, utteranceIndex, uttName, ptrUtterance, &currentNumFrames, &currentNumFrameElementsInput,
&currentNumBytesPerElementInput);
if (numFrames == 0) {
numFrames = currentNumFrames;
} else if (numFrames != currentNumFrames) {
std::string errMessage("Number of frames in ark files is different: " + std::to_string(numFrames) + " and " +
std::string errMessage("Number of frames in input files is different: " + std::to_string(numFrames) + " and " +
std::to_string(currentNumFrames));
throw std::logic_error(errMessage);
}
@@ -979,19 +864,28 @@ int main(int argc, char* argv[]) {
int i = 0;
for (auto& ptrInputBlob : ptrInputBlobs) {
if (ptrInputBlob->size() != numFrameElementsInput[i++] * batchSize) {
throw std::logic_error("network input size(" + std::to_string(ptrInputBlob->size()) + ") mismatch to ark file size (" +
throw std::logic_error("network input size(" + std::to_string(ptrInputBlob->size()) + ") mismatch to input file size (" +
std::to_string(numFrameElementsInput[i - 1] * batchSize) + ")");
}
}
ptrScores.resize(numFrames * numScoresPerFrame * sizeof(float));
if (!FLAGS_r.empty()) {
/** Read ark file with reference scores **/
/** Read file with reference scores **/
BaseFile* fileReferenceScores;
auto exReferenceScoresFile = fileExt(FLAGS_r);
if (exReferenceScoresFile == "ark") {
fileReferenceScores = &arkFile;
} else if (exReferenceScoresFile == "npz") {
fileReferenceScores = &numpyFile;
} else {
throw std::logic_error("Invalid Reference Scores file");
}
std::string refUtteranceName;
GetKaldiArkInfo(reference_name_files[next_output].c_str(), utteranceIndex, &n, &numBytesReferenceScoreThisUtterance);
fileReferenceScores->GetFileInfo(reference_name_files[next_output].c_str(), utteranceIndex, &n, &numBytesReferenceScoreThisUtterance);
ptrReferenceScores.resize(numBytesReferenceScoreThisUtterance);
LoadKaldiArkArray(reference_name_files[next_output].c_str(), utteranceIndex, refUtteranceName, ptrReferenceScores, &numFramesReference,
&numFrameElementsReference, &numBytesPerElementReference);
fileReferenceScores->LoadFile(reference_name_files[next_output].c_str(), utteranceIndex, refUtteranceName, ptrReferenceScores,
&numFramesReference, &numFrameElementsReference, &numBytesPerElementReference);
}
double totalTime = 0.0;
@@ -1009,7 +903,7 @@ int main(int argc, char* argv[]) {
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> callPerfMap;
size_t frameIndex = 0;
uint32_t numFramesArkFile = numFrames;
uint32_t numFramesFile = numFrames;
numFrames += FLAGS_cw_l + FLAGS_cw_r;
uint32_t numFramesThisBatch {batchSize};
@@ -1120,7 +1014,7 @@ int main(int argc, char* argv[]) {
}
/** Iterate over all the input blobs **/
for (size_t i = 0; i < numInputArkFiles; ++i) {
for (size_t i = 0; i < numInputFiles; ++i) {
MemoryBlob::Ptr minput = as<MemoryBlob>(ptrInputBlobs[i]);
if (!minput) {
std::string errMessage("We expect ptrInputBlobs[" + std::to_string(i) + "] to be inherited from MemoryBlob, " +
@@ -1141,14 +1035,14 @@ int main(int argc, char* argv[]) {
inferRequest.numFramesThisBatch = numFramesThisBatch;
frameIndex += numFramesThisBatch;
for (size_t j = 0; j < inputArkFiles.size(); j++) {
for (size_t j = 0; j < inputFiles.size(); j++) {
if (FLAGS_cw_l > 0 || FLAGS_cw_r > 0) {
int idx = frameIndex - FLAGS_cw_l;
if (idx > 0 && idx < static_cast<int>(numFramesArkFile)) {
if (idx > 0 && idx < static_cast<int>(numFramesFile)) {
inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
} else if (idx >= static_cast<int>(numFramesArkFile)) {
} else if (idx >= static_cast<int>(numFramesFile)) {
inputFrame[j] =
&ptrUtterances[j].front() + (numFramesArkFile - 1) * sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
&ptrUtterances[j].front() + (numFramesFile - 1) * sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
} else if (idx <= 0) {
inputFrame[j] = &ptrUtterances[j].front();
}
@@ -1179,9 +1073,17 @@ int main(int argc, char* argv[]) {
// --------------------------- Step 8. Process output part 2 -------------------------------------------------------
if (!FLAGS_o.empty()) {
auto exOutputScoresFile = fileExt(FLAGS_o);
if (exOutputScoresFile == "ark") {
fileOutput = &arkFile;
} else if (exOutputScoresFile == "npz") {
fileOutput = &numpyFile;
} else {
throw std::logic_error("Invalid Reference Scores file");
}
/* Save output data to file */
bool shouldAppend = (utteranceIndex == 0) ? false : true;
SaveKaldiArkArray(output_name_files[next_output].c_str(), shouldAppend, uttName, &ptrScores.front(), numFramesArkFile, numScoresPerFrame);
fileOutput->SaveFile(output_name_files[next_output].c_str(), shouldAppend, uttName, &ptrScores.front(), numFramesFile, numScoresPerFrame);
}
/** Show performance results **/

View File

@@ -14,7 +14,7 @@
static const char help_message[] = "Print a usage message.";
/// @brief message for images argument
static const char input_message[] = "Required. Paths to .ark files. Example of usage: <file1.ark,file2.ark> or <file.ark>.";
static const char input_message[] = "Required. Paths to input files. Example of usage: <file1.ark,file2.ark> or <file.ark> or <file.npz>.";
/// @brief message for model argument
static const char model_message[] = "Required. Path to an .xml file with a trained model (required if -rg is missing).";
@@ -49,10 +49,10 @@ static const char custom_cpu_library_message[] = "Required for CPU plugin custom
"Absolute path to a shared library with the kernels implementations.";
/// @brief message for score output argument
static const char output_message[] = "Optional. Output file name to save ark scores.";
static const char output_message[] = "Optional. Output file name to save scores. Example of usage: <output.ark> or <output.npz>";
/// @brief message for reference score file argument
static const char reference_score_message[] = "Optional. Read reference score .ark file and compare scores.";
static const char reference_score_message[] = "Optional. Read reference score file and compare scores. Example of usage: <reference.ark> or <reference.npz>";
/// @brief message for read GNA model argument
static const char read_gna_model_message[] = "Read GNA model from file using path/filename provided (required if -m is missing).";

View File

@@ -425,6 +425,11 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
auto pass_config = manager.get_pass_config();
pass_config->set_callback<ngraph::pass::UnrollTensorIterator>(
[config](const std::shared_ptr<const ngraph::Node> &node) -> bool {
auto sub_graph_op = std::dynamic_pointer_cast<const ngraph::op::util::SubGraphOp>(node);
int64_t num_iter = sub_graph_op->get_num_iterations();
if (num_iter == 1) {
return false;
}
return !config.enable_loop_unrolling;
});

View File

@@ -83,10 +83,11 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
for (auto o = transpose_order.size(); o < 4; o++)
transpose_order.push_back((uint16_t)o);
std::vector<uint16_t> cldnn_permute_order = ConvertPermuteOrder(transpose_order);
auto permuteName = op->get_friendly_name() + "/transpose_b";
auto permutePrim = cldnn::permute(permuteName,
weightsName,
transpose_order);
cldnn_permute_order);
p.AddPrimitive(permutePrim);
p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
weightsName = permuteName;
@@ -102,10 +103,11 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
for (auto o = transpose_order.size(); o < 4; o++)
transpose_order.push_back((uint16_t)o);
std::vector<uint16_t> cldnn_permute_order = ConvertPermuteOrder(transpose_order);
auto permuteName = op->get_friendly_name() + "/transpose_a";
auto permutePrim = cldnn::permute(permuteName,
inputName,
transpose_order);
cldnn_permute_order);
p.AddPrimitive(permutePrim);
p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
inputName = permuteName;

View File

@@ -18,6 +18,7 @@
#include "api/reduce.hpp"
#include "api/reorder.hpp"
#include "api/reshape.hpp"
namespace CLDNNPlugin {
@@ -78,6 +79,28 @@ void CreateReduceOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::
p.AddPrimitive(reducePrim);
auto resultLayerName = layerName;
auto out_dims = op->get_output_shape(0).size();
if (out_dims == 3 && !keep_dims && rank >= 4) {
resultLayerName = layerName + "_reshape";
auto out_shape = op->get_output_shape(0);
cldnn::tensor outTensor;
switch (rank) {
case 6:
outTensor = cldnn::tensor(TensorValue(out_shape[0]), TensorValue(out_shape[1]),
1, TensorValue(out_shape[2]), 1, 1);
case 5:
outTensor = cldnn::tensor(TensorValue(out_shape[0]), TensorValue(out_shape[1]),
1, TensorValue(out_shape[2]), 1);
case 4:
outTensor = cldnn::tensor(TensorValue(out_shape[0]), TensorValue(out_shape[1]),
1, TensorValue(out_shape[2]));
}
auto reshape_prim = cldnn::reshape(resultLayerName, layerName, outTensor);
p.AddPrimitive(reshape_prim);
p.AddPrimitiveToProfiler(op, resultLayerName);
}
auto reorderLayerName = layerName + "_reorder";
cldnn::format out_format = cldnn::format::any;
auto out_dt = DataTypeFromPrecision(op->get_output_element_type(0));
@@ -89,7 +112,7 @@ void CreateReduceOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::
else if (rank - rawAxes.size() <= 4)
out_format = cldnn::format::bfyx;
auto reorder_prim = cldnn::reorder(reorderLayerName, layerName, out_format, out_dt);
auto reorder_prim = cldnn::reorder(reorderLayerName, resultLayerName, out_format, out_dt);
p.AddPrimitive(reorder_prim);
p.AddPrimitiveToProfiler(op, reorderLayerName);
} else {

View File

@@ -71,7 +71,7 @@ struct DnnActivation {
return type;
}
static DnnActivation fromType(DnnActivationType type) {
DnnActivation activation;
DnnActivation activation{};
activation.type = type;
activation.args = {};
return activation;

View File

@@ -26,7 +26,7 @@ class GNAFakeQuantizeLayer {
* @brief convert FQ layer directly to gna-pwl activation layer
*/
DnnActivation parseAsActivation() const {
DnnActivation fqActivation;
DnnActivation fqActivation{};
fqActivation.fqParams.levels = fqLayer->GetParamAsSizeT("levels");
auto inputShape = getShapeForRange(fqLayer, 1);

View File

@@ -2091,6 +2091,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
};
auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
IE_ASSERT(quantParams != nullptr);
// Find all output layers connected to FQ
auto nextLayers = CNNNetGetAllNextLayersSkipCertain(layer.get(), -1, donotSkip);
@@ -2304,7 +2305,7 @@ void TransposeWeightsFromNCHWToNHWCPass::run() {
}
}
// Find a convolution in next layers to rotate weights columns
if (!l->outData.empty() && !getInputTo(l->outData[0]).empty() && !l->outData.empty() && !getInputTo(l->outData[0]).empty()) {
if (!l->outData.empty() && !getInputTo(l->outData[0]).empty()) {
std::vector<TranspositionInfo> transpositionInfo;
auto nextLayer = getInputTo(l->outData[0]).begin()->second;
transpositionInfo = FindTranspositionInfoFromNextLayers(nextLayer);
@@ -2345,7 +2346,7 @@ void TransposeWeightsFromNCHWToNHWCPass::run() {
}
// Find a convolution in previous or next layers
auto transpositionInfo = FindTranspositionInfoFromPrevLayers(firstInput);
if (!FoundPartToTranspose(transpositionInfo)) {
if (!FoundPartToTranspose(transpositionInfo) && !l->outData.empty() && !getInputTo(l->outData[0]).empty()) {
transpositionInfo = FindTranspositionInfoFromNextLayers(getInputTo(l->outData[0]).begin()->second);
}
if (FoundPartToTranspose(transpositionInfo)) {

View File

@@ -219,7 +219,7 @@ export(TARGETS ${TARGET_NAME} NAMESPACE IE::
# Export for developer package
ie_developer_export_targets(${TARGET_NAME} ${TARGET_NAME}_plugin_api)
ie_developer_export_targets(${TARGET_NAME}_plugin_api)
# install TBB
@@ -281,8 +281,6 @@ install(EXPORT InferenceEngineTargets
DESTINATION ${IE_CPACK_IE_DIR}/share
COMPONENT core_dev)
include(CMakePackageConfigHelpers)
set(IE_NGRAPH_DIR "${CMAKE_BINARY_DIR}/ngraph")
set(IE_INCLUDE_DIR "${PUBLIC_HEADERS_DIR}")
set(IE_PARALLEL_CMAKE "${InferenceEngine_SOURCE_DIR}/cmake/ie_parallel.cmake")

View File

@@ -11,6 +11,16 @@ using namespace InferenceEngine;
void InferenceEngine::LowLatency(InferenceEngine::CNNNetwork &network) {
auto function = network.getFunction();
ngraph::pass::Manager manager;
NGRAPH_SUPPRESS_DEPRECATED_START
manager.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END
manager.run_passes(function);
}
void InferenceEngine::lowLatency2(InferenceEngine::CNNNetwork &network,
bool use_const_initializer) {
auto function = network.getFunction();
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::LowLatency2>(use_const_initializer);
manager.run_passes(function);
}

View File

@@ -32,6 +32,7 @@ public:
LowPrecisionTransformations() {}
LowPrecisionTransformations(
const std::map<std::string, LayerTransformationPtr>& branchSpecificTransformations,
const std::map<std::string, LayerTransformationPtr>& decompositionTransformations,
const std::map<std::string, LayerTransformationPtr>& transformations,
const std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>>& cleanupTransformations,
const std::vector<StandaloneCleanup>& standaloneCleanupTransformations);

View File

@@ -24,6 +24,10 @@ void ConvertTransformation::registerMatcherIn(GraphRewrite &pass, Transformation
bool ConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
std::shared_ptr<opset1::Convert> convert = as_type_ptr<opset1::Convert>(m.get_match_root());
if (!convert) {
return false;
}
if (!canBeTransformed(context, convert)) {
return false;
}

View File

@@ -76,10 +76,12 @@ namespace low_precision {
LowPrecisionTransformations::LowPrecisionTransformations(
const std::map<std::string, LayerTransformationPtr>& branchSpecificTransformations,
const std::map<std::string, LayerTransformationPtr>& decompositionTransformations,
const std::map<std::string, LayerTransformationPtr>& transformations,
const std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>>& cleanupTransformations,
const std::vector<StandaloneCleanup>& standaloneCleanupTransformations) :
branchSpecificTransformations(branchSpecificTransformations),
decompositionTransformations(decompositionTransformations),
transformations(transformations),
cleanupTransformations(cleanupTransformations),
standaloneCleanupTransformations(standaloneCleanupTransformations) {}

View File

@@ -158,12 +158,11 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) {
};
auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
if ((parentNode->isConstant() && !childNode->isConstant()) || childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() ||
childNode->getParentEdges().size() != 2)
if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2)
return false;
auto biasNode = childNode->getParentEdgesAtPort(1)[0]->getParent();
if (biasNode->getChildEdges().size() != 1)
if (biasNode->getType() != Input || !biasNode->isConstant() || biasNode->getChildEdges().size() != 1)
return false;
auto convOutDims = parentNode->getChildEdgesAtPort(0)[0]->getDims().ToSizeVector();
@@ -265,7 +264,7 @@ void MKLDNNGraphOptimizer::FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &grap
auto& graphNodes = graph.GetNodes();
auto isSuitableParentNode = [](MKLDNNNodePtr node) {
return node->getType() == Deconvolution && node->getChildEdges().size() == 1 && node->getFusedWith().empty();
return node->getType() == Deconvolution && node->getChildEdges().size() == 1;
};
auto parent = graphNodes.begin();
@@ -277,8 +276,7 @@ void MKLDNNGraphOptimizer::FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &grap
}
auto childNode = parentNode->getChildEdgeAt(0)->getChild();
// at this moment deconvolution supports only depthwise as post op
if (!childNode->canBePerformedAsScaleShift(parentNode.get())) {
if (!parentNode->canFuse(childNode)) {
parent++;
continue;
}
@@ -302,6 +300,8 @@ void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) {
auto& graphNodes = graph.GetNodes();
auto isSutableSecondInput = [](MKLDNNNodePtr node, MKLDNNDims dataDims) {
if (node->getType() != Input || !node->isConstant())
return false;
auto secondInputDims = node->outDims[0];
if (secondInputDims.ndims() != dataDims.ndims() || secondInputDims.ndims() < 2)
return false;
@@ -326,8 +326,7 @@ void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) {
};
auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
if ((parentNode->isConstant() && !childNode->isConstant()) || childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() ||
childNode->getParentEdges().size() != 2)
if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2)
return false;
return isSutableSecondInput(childNode->getParentEdgesAtPort(1)[0]->getParent(), childNode->getParentEdgesAtPort(0)[0]->getDims());
@@ -1518,9 +1517,9 @@ void MKLDNNGraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(MKLDNNGraph
auto& graphNodes = graph.GetNodes();
auto getConstPort = [](const MKLDNNNodePtr node) -> int {
if (node->getParentEdgeAt(0)->getParent()->isConstant() && node->getParentEdgeAt(0)->getParent()->getType() == Input) {
if (node->getParentEdgeAt(0)->getParent()->getType() == Input && node->getParentEdgeAt(0)->getParent()->isConstant()) {
return 0;
} else if (node->getParentEdgeAt(1)->getParent()->isConstant() && node->getParentEdgeAt(1)->getParent()->getType() == Input) {
} else if (node->getParentEdgeAt(1)->getParent()->getType() == Input && node->getParentEdgeAt(1)->getParent()->isConstant()) {
return 1;
} else {
return -1;

View File

@@ -1296,7 +1296,7 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
fusingPort = i;
continue;
}
if (!node->isConstant() || node->getType() != Input) {
if (node->getType() != Input || !node->isConstant()) {
return false;
}
}

View File

@@ -590,8 +590,9 @@ public:
isInQuantizedGraph = flag;
}
protected:
bool canBePerformedAsScaleShift(const MKLDNNNode *parentNode = nullptr) const;
protected:
bool canFuseSimpleOperation(const MKLDNNNodePtr& node) const;
// TODO [mandrono]: place outside of the node API
void fillScalesAndShifts(const MKLDNNNode *parentNode, std::vector<float> &scales, std::vector<float> &shifts, const int align = -1);

View File

@@ -4,6 +4,7 @@
#include "mkldnn_deconv_node.h"
#include "mkldnn_eltwise_node.h"
#include "mkldnn_fake_quantize_node.h"
#include "mkldnn_input_node.h"
#include <mkldnn.hpp>
#include <string>
@@ -143,19 +144,23 @@ InferenceEngine::Blob::Ptr MKLDNNDeconvolutionNode::createWeiBlobAsIO(InferenceE
return internalBlob;
}
bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() {
if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common))
return false;
bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const {
// todo: [antonvor] added these checks to fix performance problems
if (kernel.size() == 3)
return false;
if (!withGroups && IC % 4 != 0 && OC % 4 != 0)
return false;
// todo: [antonvor] fusing is not supported yet for int8
if (!fusedWith.empty())
if (!withGroups && stride.back() > 3)
return false;
if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common)) {
auto inDims = getChildEdgeAt(0)->getDims().ToSizeVector();
// heuristicConst = 2^26
// heuristicParam = IC^2 * SP
auto heuristicConst = 67108864;
auto heuristicParam = IC * IC;
for (int i = 2; i < inDims.size(); i++)
heuristicParam *= inDims[i];
if (heuristicParam > heuristicConst)
return false;
}
for (int i = 0; i < kernel.size(); i++) {
if (kernel[i] < stride[i])
@@ -163,7 +168,11 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() {
}
// not supported in oneDNN
if (withGroups && !isDW && (IC % 16 != 0 || OC % 16 != 0))
int channelBlock = impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common) ? 16
: impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) ? 8 : 4;
if (withGroups && !isDW && (IC % channelBlock != 0 || OC % channelBlock != 0))
return false;
if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common) && stride.back() > 3)
return false;
InferenceEngine::Precision inPrecision = getOriginalInputPrecisionAtPort(0);
@@ -178,6 +187,13 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() {
return (inputDataType == dnnl_s8 || inputDataType == dnnl_u8) && weightsDataType == dnnl_s8;
}
bool MKLDNNDeconvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
if (canBeExecutedInInt8())
return canFuseSimpleOperation(node);
return (fusedWith.empty() && node->canBePerformedAsScaleShift(this));
}
void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
if (!descs_fwd.empty() && !descs_bwd.empty())
return;
@@ -196,6 +212,9 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outPrecision);
if (inputDataType == memory::data_type::bf16 || outputDataType == memory::data_type::bf16)
inputDataType = outputDataType = memory::data_type::bf16;
if (!fusedWith.empty()) {
outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0));
}
if (getParentEdges().size() != 2 && getParentEdges().size() != 3)
IE_THROW() << errorPrefix << " has incorrect number of input edges";
@@ -240,6 +259,11 @@ void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
eltwiseNode->appendPostOps(ops);
continue;
}
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
if (fakeQuantizeNode) {
fakeQuantizeNode->appendPostOps(ops);
continue;
}
IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented";
}

View File

@@ -37,6 +37,7 @@ public:
InferenceEngine::Precision getRuntimePrecision() const override;
static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
bool canFuse(const MKLDNNNodePtr& node) const override;
private:
bool withGroups = false;
@@ -60,7 +61,7 @@ private:
std::string errorPrefix;
bool canBeExecutedInInt8();
bool canBeExecutedInInt8() const;
InferenceEngine::Blob::Ptr createWeiBlobAsIO(InferenceEngine::SizeVector dims);
};

View File

@@ -604,9 +604,13 @@ private:
bool MKLDNNMVNNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
const auto& inDataShapeSize = op->input_value(0).get_shape().size();
if (inDataShapeSize < 1 || inDataShapeSize > 5) {
errorMessage = "First input accepts ranks from 1 to 5. Actual: " + std::to_string(inDataShapeSize);
if (op->get_output_partial_shape(0).rank().is_dynamic()) {
errorMessage = "Unsupported dynamic input rank.";
return false;
}
const auto& inDataRank = op->get_output_partial_shape(0).rank().get_length();
if (inDataRank < 1 || inDataRank > 5) {
errorMessage = "First input accepts ranks from 1 to 5. Actual: " + std::to_string(inDataRank);
return false;
}
@@ -632,21 +636,20 @@ bool MKLDNNMVNNode::isSupportedOperation(const std::shared_ptr<const ngraph::Nod
// 4D: axes: [1,2,3], [2,3]
// 5D: axes: [1,2,3,4], [2,3,4]
auto axesVal = axesOp->cast_vector<int>();
auto& mvnShape = mvnOp->get_output_shape(0);
for (int& axe : axesVal)
axe = axe < 0 ? axe + mvnShape.size() : axe;
axe = axe < 0 ? axe + inDataRank : axe;
std::sort(axesVal.begin(), axesVal.end());
if (mvnShape.size() == 1) {
if (inDataRank == 1) {
if (axesVal.size() != 1 || axesVal[0] != 0) {
errorMessage = "Unsupported axes.";
return false;
}
} else {
if (mvnShape.size() > 5 || (mvnShape.size() != axesVal.size() + 1 && mvnShape.size() != axesVal.size() + 2)) {
if (inDataRank > 5 || (inDataRank != axesVal.size() + 1 && inDataRank != axesVal.size() + 2)) {
errorMessage = "Unsupported axes.";
return false;
}
int value = mvnShape.size() - 1;
int value = inDataRank - 1;
for (int i = axesVal.size() - 1; i >= 0; i--, value--) {
if (axesVal[i] != value) {
errorMessage = "Unsupported axes.";

View File

@@ -0,0 +1,33 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <vector>
#include <memory>
#include <transformations_visibility.hpp>
#include <ngraph/ngraph.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
#include "ngraph/pattern/matcher.hpp"
namespace ngraph {
namespace pass {
class TRANSFORMATIONS_API SplitSqueezeConcatFusion;
} // namespace pass
} // namespace ngraph
/**
* @ingroup ie_transformation_common_api
* @brief SplitSqueezeConcatFusion transformation replaces group of
* operations: Split -> Squeeze (multiple) -> Concat to Transpose -> Reshape ops.
*/
class ngraph::pass::SplitSqueezeConcatFusion : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
SplitSqueezeConcatFusion();
};

View File

@@ -17,23 +17,13 @@ namespace ngraph {
namespace pass {
class TRANSFORMATIONS_API TransposeSinking;
class TRANSFORMATIONS_API TransposeOptimization;
class TRANSFORMATIONS_API TransposeReduction;
class TRANSFORMATIONS_API TransposeFQReduction;
class TRANSFORMATIONS_API TransposeFuse;
} // namespace pass
} // namespace ngraph
/**
* @ingroup ie_transformation_common_api
* @brief TransposeOptimization transformation replaces suitable Transposes with Reshape operation or optimises them out
*/
class ngraph::pass::TransposeOptimization : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
TransposeOptimization();
};
/**
* @ingroup ie_transformation_common_api
* @brief TransposeReduction transformation sinks Transpose through Reduce operations
@@ -54,6 +44,17 @@ public:
TransposeFQReduction();
};
/**
* @ingroup ie_transformation_common_api
* @brief TransposeFuse transformation eliminates 2 consequtive Transposes if they result in no changes to input or fuses them
* to single Transpose if input gets changed
*/
class ngraph::pass::TransposeFuse : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
TransposeFuse();
};
/**
* @ingroup ie_transformation_common_api
* @brief TransposeSinking transformation sinks Transposes through known operations
@@ -64,6 +65,6 @@ public:
TransposeSinking() {
add_matcher<ngraph::pass::TransposeFQReduction>();
add_matcher<ngraph::pass::TransposeReduction>();
add_matcher<ngraph::pass::TransposeOptimization>();
add_matcher<ngraph::pass::TransposeFuse>();
}
};
};

View File

@@ -0,0 +1,32 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <vector>
#include <memory>
#include <transformations_visibility.hpp>
#include <ngraph/ngraph.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
#include "ngraph/pattern/matcher.hpp"
namespace ngraph {
namespace pass {
class TRANSFORMATIONS_API TransposeToReshape;
} // namespace pass
} // namespace ngraph
/**
* @ingroup ie_transformation_common_api
* @brief TransposeToReshape transformation replaces suitable Transposes with Reshape operation or optimizes them out
*/
class ngraph::pass::TransposeToReshape : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
TransposeToReshape();
};

View File

@@ -93,7 +93,7 @@ static bool simplify_gather_shapeof(shared_ptr<Node> node) {
auto zero_axis = opset3::Constant::create<int64_t>(element::i64, Shape{}, {0});
NodeVector new_ops;
auto new_shapeof = make_shared<opset3::ShapeOf>(gather->input_value(0));
auto new_shapeof = make_shared<opset3::ShapeOf>(gather->input_value(0), node->get_output_element_type(0));
new_ops.push_back(new_shapeof);
std::shared_ptr<Node> replace_op;
if (indices_rank.get_length() == 0) {
@@ -113,7 +113,7 @@ static bool simplify_gather_shapeof(shared_ptr<Node> node) {
new_ops.push_back(gather);
concat_inputs.push_back(gather);
}
auto shapeof_indices = make_shared<opset3::ShapeOf>(gather->input_value(1));
auto shapeof_indices = make_shared<opset3::ShapeOf>(gather->input_value(1), node->get_output_element_type(0));
new_ops.push_back(shapeof_indices);
concat_inputs.push_back(shapeof_indices);

View File

@@ -41,6 +41,8 @@
#include "transformations/common_optimizations/batch_to_space_fusion.hpp"
#include "transformations/common_optimizations/dilated_convolution_converter.hpp"
#include "transformations/common_optimizations/transpose_sinking.hpp"
#include "transformations/common_optimizations/split_squeeze_concat_fusion.hpp"
#include "transformations/common_optimizations/transpose_to_reshape.hpp"
#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
#include "transformations/op_conversions/convert_pad_to_group_conv.hpp"
#include "transformations/op_conversions/convert_divide.hpp"
@@ -91,7 +93,13 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
manager.register_pass<ngraph::pass::ConstantFolding>();
manager.register_pass<ngraph::pass::StridedSliceOptimization>(); // depends on CF
manager.register_pass<ngraph::pass::BroadcastElementwiseFusion>();
manager.register_pass<ngraph::pass::TransposeSinking>();
auto transpose_sinking = manager.register_pass<ngraph::pass::GraphRewrite>();
transpose_sinking->add_matcher<ngraph::pass::TransposeSinking>();
// SplitSqueezeConcatFusion should work in same GraphRewrite as TransposesSinking,
// because it replaces pattern that may contain Transposes which must be optimized before
// the transformation and it also inserts Transpose that can be optimized by TransposeSinking
transpose_sinking->add_matcher<ngraph::pass::SplitSqueezeConcatFusion>();
auto eliminations = manager.register_pass<ngraph::pass::GraphRewrite>();
eliminations->add_matcher<ngraph::pass::EliminateUnsqueezeGather>();
@@ -119,6 +127,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
common_fusions->add_matcher<ngraph::pass::BatchToSpaceFusion>();
common_fusions->add_matcher<ngraph::pass::DilatedConvolutionConverter>();
common_fusions->add_matcher<ngraph::pass::GeluFusion>();
common_fusions->add_matcher<ngraph::pass::TransposeToReshape>();
common_fusions->set_name("ngraph::pass::CommonFusions");
manager.register_pass<ngraph::pass::ConvertPadToGroupConvolution, false>();

View File

@@ -0,0 +1,95 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "itt.hpp"
#include "transformations/common_optimizations/split_squeeze_concat_fusion.hpp"
#include <memory>
#include <vector>
#include <numeric>
#include <ngraph/opsets/opset7.hpp>
#include <ngraph/rt_info.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
NGRAPH_RTTI_DEFINITION(ngraph::pass::SplitSqueezeConcatFusion, "SplitSqueezeConcatFusion", 0);
ngraph::pass::SplitSqueezeConcatFusion::SplitSqueezeConcatFusion() {
MATCHER_SCOPE(SplitSqueezeConcatFusion);
// Detect only concat, because we don't know how many inputs will go into concat
auto concat_pattern = ngraph::pattern::wrap_type<ngraph::opset7::Concat>();
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
const auto& pattern_to_output = m.get_pattern_value_map();
auto concat = std::dynamic_pointer_cast<ngraph::opset7::Concat>(pattern_to_output.at(concat_pattern).get_node_shared_ptr());
if (!concat) return false;
NodeVector nodes_to_delete{ concat };
int64_t axis_value = 0;
std::shared_ptr<ngraph::opset7::Split> split;
const auto& concat_inputs = concat->input_values();
if (concat_inputs.empty()) return false;
for (size_t i = 0; i < concat_inputs.size(); i++) {
auto squeeze = std::dynamic_pointer_cast<ngraph::opset7::Squeeze>(concat_inputs[i].get_node_shared_ptr());
if (!squeeze) return false;
nodes_to_delete.push_back(squeeze);
auto split_to_check = std::dynamic_pointer_cast<ngraph::opset7::Split>(squeeze->input_value(0).get_node_shared_ptr());
auto squeeze_axes = std::dynamic_pointer_cast<ngraph::opset7::Constant>(squeeze->input_value(1).get_node_shared_ptr());
if (!squeeze_axes || !split_to_check) return false;
auto squeeze_axes_vec = squeeze_axes->cast_vector<int64_t>();
if (squeeze_axes_vec.size() != 1) return false;
if (i == 0) {
axis_value = squeeze_axes_vec[0];
nodes_to_delete.push_back(split_to_check);
split = split_to_check;
} else if (axis_value != squeeze_axes_vec[0] || split_to_check != split) {
return false;
}
auto split_output = squeeze->input_value(0);
if (split_output.get_target_inputs().size() != 1 ||
split_output.get_index() != i)
return false;
}
if (split->get_num_splits() != concat_inputs.size()) return false;
auto split_axis = std::dynamic_pointer_cast<ngraph::opset7::Constant>(split->input_value(1).get_node_shared_ptr());
if (!split_axis) return false;
auto axis_vec = split_axis->cast_vector<int64_t>();
if (axis_vec.size() != 1 || axis_value != axis_vec[0])
return false;
auto input = split->input_value(0);
auto concat_axis = concat->get_axis();
auto rank = input.get_partial_shape().rank();
if (!rank.is_static())
return false;
std::vector<int64_t> order(rank.get_length());
std::iota(order.begin(), order.end(), 0);
order.erase(order.begin() + axis_value);
order.insert(order.begin() + concat_axis, axis_value);
auto transpose_order = ngraph::opset7::Constant::create(element::i64, { (size_t)rank.get_length() }, order);
auto transpose = register_new_node<ngraph::opset7::Transpose>(input, transpose_order);
auto shape_after = ngraph::opset7::Constant::create(element::i64, { (size_t)rank.get_length() - 1 }, concat->get_output_shape(0));
auto reshape = std::make_shared<ngraph::opset7::Reshape>(transpose, shape_after, false);
reshape->set_friendly_name(m.get_match_root()->get_friendly_name());
ngraph::copy_runtime_info(nodes_to_delete, { transpose, reshape });
ngraph::replace_node(m.get_match_root(), reshape);
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(concat_pattern, matcher_name);
register_matcher(m, callback);
}

View File

@@ -10,14 +10,15 @@
#include <vector>
#include <ngraph/opsets/opset6.hpp>
#include <ngraph/opsets/opset7.hpp>
#include <ngraph/rt_info.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <numeric>
NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeSinking, "TransposeSinking", 0);
NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeOptimization, "TransposeOptimization", 0);
NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeReduction, "TransposeReduction", 0);
NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeFQReduction, "TransposeFQReduction", 0);
NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeFuse, "TransposeFuse", 0);
using namespace ngraph;
@@ -55,103 +56,6 @@ std::shared_ptr<ngraph::opset6::Constant> get_reversed_order_constant(const std:
ngraph::element::i64, ngraph::Shape{reverse_order.size()}, reverse_order);
}
bool replace_transpose_with_reshape(const std::shared_ptr<Node>& transpose) {
auto data = transpose->input_value(0);
const auto input_shape = transpose->input(0).get_partial_shape();
const size_t input_shape_rank = input_shape.rank().get_length();
auto order = as_type_ptr<opset6::Constant>(transpose->input_value(1).get_node_shared_ptr());
if (!order || !ngraph::shape_size(order->get_shape())) {
return false;
}
const auto order_value = order->cast_vector<int64_t>();
// Check that transpose order without 1 dims has an ascending order
int64_t last_dim(-1);
for (size_t i = 0; i < input_shape_rank; ++i) {
if (input_shape[order_value[i]].is_dynamic() || input_shape[order_value[i]] != 1) {
if (order_value[i] < last_dim) {
return false;
}
last_dim = order_value[i];
}
}
// Transpose operation can be removed if original transpose order is sorted
// or dimension that changes their places equal to 1
using DimensionToPosition = struct {
Dimension dim;
size_t pos;
};
std::vector<DimensionToPosition> dims;
for (size_t i = 0; i < input_shape_rank; ++i) {
if (order_value[i] != static_cast<int64_t>(i)) {
dims.push_back({input_shape[order_value[i]], i});
}
}
// If number of dimensions != 1 to move equal to 0 we can remove this Transpose
if (count_if(dims.begin(), dims.end(), [](const DimensionToPosition& item) {
return !(item.dim.is_static() && item.dim.get_length() == 1);
}) == 0) {
return replace_output_update_name(transpose->output(0), transpose->input_value(0));
}
// Transpose can be replaced with Reshape in two ways:
// 1. Reshape with dims as Constant
// 2. Reshape with dims as input (ShapeOf->Gather)
//
// The first case is possible only if one or less dynamic dimensions changes their position
// For example: input_shape {?, 3, 1, ?} and order {0, 1, 3, 2} can be replaced with Reshape
// with Constant {0, 3, -1, 1} but if input_shape {?, 1, 1, ?} and order {1, 0, 3, 2} transpose
// cannot be replaced int the same way and in this case its only possible to use Gather(ShapeOf,
// order)
Output<Node> reshape_dim;
NodeVector new_ops;
if (count_if(dims.begin(), dims.end(), [](const DimensionToPosition& item) {
return item.dim.is_dynamic();
}) < 2) {
std::vector<int64_t> reshape_value(input_shape_rank, 0);
for (const auto& item : dims) {
reshape_value[item.pos] = item.dim.is_dynamic() ? -1 : item.dim.get_length();
}
reshape_dim =
opset3::Constant::create(element::i64, Shape{reshape_value.size()}, reshape_value);
} else {
auto shape_of = std::make_shared<opset3::ShapeOf>(data);
new_ops.push_back(shape_of);
reshape_dim = std::make_shared<opset3::Gather>(
shape_of, order, opset3::Constant::create(element::i64, Shape{1}, {0}));
new_ops.push_back(reshape_dim.get_node_shared_ptr());
}
auto reshape_op = std::make_shared<opset3::Reshape>(data, reshape_dim, true);
new_ops.push_back(reshape_op);
reshape_op->set_friendly_name(transpose->get_friendly_name());
copy_runtime_info(transpose, new_ops);
replace_node(transpose, reshape_op);
return true;
}
ngraph::pass::TransposeOptimization::TransposeOptimization() {
MATCHER_SCOPE(TransposeOptimization);
auto transpose_label = pattern::wrap_type<opset6::Transpose>(
{pattern::any_input(pattern::has_static_rank()), pattern::wrap_type<opset6::Constant>()});
ngraph::matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher &m) {
return replace_transpose_with_reshape(m.get_match_root());
};
auto m = std::make_shared<ngraph::pattern::Matcher>(transpose_label, matcher_name);
register_matcher(m, matcher_pass_callback);
}
ngraph::pass::TransposeReduction::TransposeReduction() {
MATCHER_SCOPE(TransposeReduction);
@@ -271,3 +175,50 @@ ngraph::pass::TransposeFQReduction::TransposeFQReduction() {
auto m = std::make_shared<ngraph::pattern::Matcher>(reduce_or_squeeze_label, matcher_name);
register_matcher(m, matcher_pass_callback);
}
ngraph::pass::TransposeFuse::TransposeFuse() {
MATCHER_SCOPE(TransposeFuse);
auto transpose_1 = pattern::wrap_type<opset7::Transpose>({ pattern::any_input(), pattern::wrap_type<opset7::Constant>() }, pattern::consumers_count(1));
auto transpose_2 = pattern::wrap_type<opset7::Transpose>({ transpose_1, pattern::wrap_type<opset7::Constant>() });
ngraph::matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher& m) {
const auto& pattern_to_output = m.get_pattern_value_map();
auto transpose1 = pattern_to_output.at(transpose_1).get_node_shared_ptr();
auto transpose2 = pattern_to_output.at(transpose_2).get_node_shared_ptr();
auto input = transpose1->input_value(0);
auto transpose1_order = std::dynamic_pointer_cast<ngraph::opset7::Constant>(transpose1->get_input_node_shared_ptr(1));
auto transpose2_order = std::dynamic_pointer_cast<ngraph::opset7::Constant>(transpose2->get_input_node_shared_ptr(1));
if (!transpose1_order || !transpose2_order)
return false;
auto order1 = transpose1_order->cast_vector<int64_t>();
auto order2 = transpose2_order->cast_vector<int64_t>();
if (order1.size() != order2.size())
return false;
bool is_ordered = true;
for (size_t i = 0; i < order1.size(); i++) {
order2[i] = order1[order2[i]];
if (order2[i] != (int64_t)i)
is_ordered = false;
}
if (is_ordered) {
return ngraph::replace_output_update_name(transpose2->output(0), input);
} else {
auto new_order = ngraph::opset7::Constant::create(element::i64, {order2.size()}, order2);
auto new_transpose = register_new_node<ngraph::opset7::Transpose>(input, new_order);
ngraph::copy_runtime_info({ transpose1, transpose2 }, new_transpose);
ngraph::replace_node(transpose2, new_transpose);
}
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(transpose_2, matcher_name);
register_matcher(m, matcher_pass_callback);
}

View File

@@ -0,0 +1,115 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "itt.hpp"
#include "transformations/common_optimizations/transpose_to_reshape.hpp"
#include "transformations/utils/utils.hpp"
#include <memory>
#include <vector>
#include <ngraph/opsets/opset6.hpp>
#include <ngraph/rt_info.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <numeric>
NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeToReshape, "TransposeToReshape", 0);
using namespace ngraph;
bool replace_transpose_with_reshape(const std::shared_ptr<Node>& transpose) {
auto data = transpose->input_value(0);
const auto input_shape = transpose->input(0).get_partial_shape();
const size_t input_shape_rank = input_shape.rank().get_length();
auto order = as_type_ptr<opset6::Constant>(transpose->input_value(1).get_node_shared_ptr());
if (!order || !ngraph::shape_size(order->get_shape())) {
return false;
}
const auto order_value = order->cast_vector<int64_t>();
// Check that transpose order without 1 dims has an ascending order
int64_t last_dim(-1);
for (size_t i = 0; i < input_shape_rank; ++i) {
if (input_shape[order_value[i]].is_dynamic() || input_shape[order_value[i]] != 1) {
if (order_value[i] < last_dim) {
return false;
}
last_dim = order_value[i];
}
}
// Transpose operation can be removed if original transpose order is sorted
// or dimension that changes their places equal to 1
using DimensionToPosition = struct {
Dimension dim;
size_t pos;
};
std::vector<DimensionToPosition> dims;
for (size_t i = 0; i < input_shape_rank; ++i) {
if (order_value[i] != static_cast<int64_t>(i)) {
dims.push_back({ input_shape[order_value[i]], i });
}
}
// If number of dimensions != 1 to move equal to 0 we can remove this Transpose
if (count_if(dims.begin(), dims.end(), [](const DimensionToPosition& item) {
return !(item.dim.is_static() && item.dim.get_length() == 1);
}) == 0) {
return replace_output_update_name(transpose->output(0), transpose->input_value(0));
}
// Transpose can be replaced with Reshape in two ways:
// 1. Reshape with dims as Constant
// 2. Reshape with dims as input (ShapeOf->Gather)
//
// The first case is possible only if one or less dynamic dimensions changes their position
// For example: input_shape {?, 3, 1, ?} and order {0, 1, 3, 2} can be replaced with Reshape
// with Constant {0, 3, -1, 1} but if input_shape {?, 1, 1, ?} and order {1, 0, 3, 2} transpose
// cannot be replaced int the same way and in this case its only possible to use Gather(ShapeOf,
// order)
Output<Node> reshape_dim;
NodeVector new_ops;
if (count_if(dims.begin(), dims.end(), [](const DimensionToPosition& item) {
return item.dim.is_dynamic();
}) < 2) {
std::vector<int64_t> reshape_value(input_shape_rank, 0);
for (const auto& item : dims) {
reshape_value[item.pos] = item.dim.is_dynamic() ? -1 : item.dim.get_length();
}
reshape_dim =
opset3::Constant::create(element::i64, Shape{ reshape_value.size() }, reshape_value);
} else {
auto shape_of = std::make_shared<opset3::ShapeOf>(data);
new_ops.push_back(shape_of);
reshape_dim = std::make_shared<opset3::Gather>(
shape_of, order, opset3::Constant::create(element::i64, Shape{ 1 }, { 0 }));
new_ops.push_back(reshape_dim.get_node_shared_ptr());
}
auto reshape_op = std::make_shared<opset3::Reshape>(data, reshape_dim, true);
new_ops.push_back(reshape_op);
reshape_op->set_friendly_name(transpose->get_friendly_name());
copy_runtime_info(transpose, new_ops);
replace_node(transpose, reshape_op);
return true;
}
ngraph::pass::TransposeToReshape::TransposeToReshape() {
MATCHER_SCOPE(TransposeToReshape);
auto transpose_label = pattern::wrap_type<opset6::Transpose>(
{ pattern::any_input(pattern::has_static_rank()), pattern::wrap_type<opset6::Constant>() });
ngraph::matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher& m) {
return replace_transpose_with_reshape(m.get_match_root());
};
auto m = std::make_shared<ngraph::pattern::Matcher>(transpose_label, matcher_name);
register_matcher(m, matcher_pass_callback);
}

View File

@@ -26,7 +26,7 @@ ngraph::pass::ConvertDivide::ConvertDivide() {
}
auto pow = std::make_shared<ngraph::opset1::Power>(div->input(1).get_source_output(),
op::Constant::create(div->get_input_element_type(1), Shape{1}, {-1}));
op::Constant::create(div->get_input_element_type(1), Shape{}, {-1}));
auto mul = std::make_shared<ngraph::opset1::Multiply>(div->input(0).get_source_output(), pow);

View File

@@ -30,14 +30,14 @@ ngraph::pass::ConvertMinimum::ConvertMinimum() {
*/
auto neg_0 = std::make_shared<ngraph::opset1::Multiply>(minimum->input(0).get_source_output(),
opset1::Constant::create(minimum->get_input_element_type(0), Shape{1}, {-1}));
opset1::Constant::create(minimum->get_input_element_type(0), Shape{}, {-1}));
auto neg_1 = std::make_shared<ngraph::opset1::Multiply>(minimum->input(1).get_source_output(),
opset1::Constant::create(minimum->get_input_element_type(1), Shape{1}, {-1}));
opset1::Constant::create(minimum->get_input_element_type(1), Shape{}, {-1}));
auto max = std::make_shared<ngraph::opset1::Maximum>(neg_0, neg_1);
auto neg_2 = std::make_shared<ngraph::opset1::Multiply>(max, opset1::Constant::create(max->get_element_type(), Shape{1}, {-1}));
auto neg_2 = std::make_shared<ngraph::opset1::Multiply>(max, opset1::Constant::create(max->get_element_type(), Shape{}, {-1}));
neg_2->set_friendly_name(minimum->get_friendly_name());
ngraph::copy_runtime_info(minimum, {neg_0, neg_1, max, neg_2});

View File

@@ -25,7 +25,7 @@ ngraph::pass::ConvertNegative::ConvertNegative() {
}
auto mul = std::make_shared<ngraph::opset1::Multiply>(neg->input(0).get_source_output(),
opset1::Constant::create(neg->get_element_type(), Shape{1}, {-1}));
opset1::Constant::create(neg->get_element_type(), Shape{}, {-1}));
mul->set_friendly_name(neg->get_friendly_name());
ngraph::copy_runtime_info(neg, mul);
ngraph::replace_node(neg, mul);

View File

@@ -61,7 +61,7 @@ ngraph::pass::ConvertSubtract::ConvertSubtract() {
}
auto neg = std::make_shared<ngraph::opset1::Multiply>(sub->input(1).get_source_output(),
opset1::Constant::create(sub->get_input_element_type(1), Shape{1}, {-1}));
opset1::Constant::create(sub->get_input_element_type(1), Shape{}, {-1}));
auto add = std::make_shared<ngraph::opset1::Add>(sub->input(0).get_source_output(), neg);

View File

@@ -80,7 +80,7 @@ class ConstantWriter {
public:
using FilePosition = int64_t;
using HashValue = size_t;
using ConstWritePositions = std::unordered_map<HashValue, FilePosition>;
using ConstWritePositions = std::unordered_map<HashValue, std::pair<FilePosition, void const *>>;
ConstantWriter(std::ostream& bin_data, bool enable_compression = true)
: m_binary_output(bin_data)
@@ -93,18 +93,19 @@ public:
m_binary_output.write(ptr, size);
return offset;
}
// The biggest supported models have at maximum 1-2 thousand constant nodes,
// with 64 bit hash that gives a probability around 1 in 10 trillion that a
// hash collision will appear. Because of this, a choice has been made to
// not perform collision detection and keep the hashing quick and seamless.
// This hash is weak (but efficient) and must be replace with some other
// more stable hash algorithm. For example current hash algorithms gives
// the same hash for {2, 2} and {0, 128} arrays. So we have to compare
// values when finding a match in hash map.
const HashValue hash = hash_combine(ptr, size);
const auto found = m_hash_to_file_positions.find(hash);
if (found != end(m_hash_to_file_positions)) {
return found->second;
if (found != end(m_hash_to_file_positions) &&
memcmp(static_cast<void const*>(ptr), found->second.second, size) == 0) {
return found->second.first;
}
m_binary_output.write(ptr, size);
m_hash_to_file_positions.insert({hash, offset});
m_hash_to_file_positions.insert({hash, {offset, static_cast<void const *>(ptr)}});
return offset;
}

View File

@@ -117,6 +117,25 @@ TEST_F(SerializatioConstantCompressionTest, IdenticalConstantsFP32) {
ASSERT_TRUE(file_size(bin_1) == unique_const_count * ngraph::shape_size(shape) * sizeof(float));
}
TEST_F(SerializatioConstantCompressionTest, NonIdenticalConstantsI64) {
constexpr int unique_const_count = 2;
const ngraph::Shape shape{2};
// hash_combine returns the same hash for this two constants so we also check the content of arrays
auto A = ngraph::op::Constant::create(ngraph::element::i64, shape, {2, 2});
auto B = ngraph::op::Constant::create(ngraph::element::i64, shape, {0, 128});
auto ngraph_a = std::make_shared<ngraph::Function>(ngraph::NodeVector{A, B},
ngraph::ParameterVector{});
ngraph::pass::Serialize(m_out_xml_path_1, m_out_bin_path_1).run_on_function(ngraph_a);
std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
ASSERT_TRUE(file_size(bin_1) == unique_const_count * ngraph::shape_size(shape) * sizeof(int64_t));
}
TEST_F(SerializatioConstantCompressionTest, IdenticalConstantsTimesTwo) {
constexpr int unique_const_count = 2;
const ngraph::Shape shape{2, 2, 2};

View File

@@ -130,7 +130,6 @@ TEST_F(NGraphReaderTests, ReadHSigmoidNetwork) {
<layer name="Multiply_744" type="Const" precision="FP32" id="4">
<output>
<port id="0" precision="FP32">
<dim>1</dim>
</port>
</output>
<blobs>
@@ -147,7 +146,6 @@ TEST_F(NGraphReaderTests, ReadHSigmoidNetwork) {
<dim>22</dim>
</port>
<port id="1">
<dim>1</dim>
</port>
</input>
<output>

View File

@@ -19,7 +19,7 @@
#include <transformations/common_optimizations/algebraic_simplification.hpp>
#include <transformations/utils/utils.hpp>
#include <transformations/init_node_info.hpp>
#include <transformations/common_optimizations/transpose_sinking.hpp>
#include <transformations/common_optimizations/transpose_to_reshape.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
@@ -312,7 +312,7 @@ TEST(algebraic_simplification, replace_transpose_with_reshape) {
pass::Manager pass_manager;
pass_manager.register_pass<pass::Validate>();
pass_manager.register_pass<pass::TransposeSinking>();
pass_manager.register_pass<pass::TransposeToReshape>();
pass_manager.run_passes(optimized_f);
auto ps = baseline_f->get_results()[0]->get_output_partial_shape(0);

View File

@@ -39,7 +39,7 @@ TEST(TransformationTests, ConvertDivide) {
auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{3, 1, 2});
auto divide_constant = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {1.5});
auto pow = std::make_shared<ngraph::opset1::Power>(divide_constant,
ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {-1}));
ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{}, {-1}));
auto mul = std::make_shared<ngraph::opset1::Multiply>(data, pow);
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{mul}, ngraph::ParameterVector{data});
@@ -75,4 +75,38 @@ TEST(TransformationTests, ConvertDivideNegative) {
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
}
TEST(TransformationTests, ConvertDivideScalar) {
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
{
auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{});
auto divide_constant = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{}, {1.5});
auto divide = std::make_shared<ngraph::opset1::Divide>(data, divide_constant);
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{divide}, ngraph::ParameterVector{data});
NGRAPH_CHECK(divide->get_output_partial_shape(0).rank().get_length() == 0);
ngraph::pass::Manager m;
m.register_pass<ngraph::pass::InitNodeInfo>();
m.register_pass<ngraph::pass::ConvertDivide>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{});
auto divide_constant = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{}, {1.5});
auto pow = std::make_shared<ngraph::opset1::Power>(divide_constant,
ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{}, {-1}));
auto mul = std::make_shared<ngraph::opset1::Multiply>(data, pow);
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{mul}, ngraph::ParameterVector{data});
NGRAPH_CHECK(mul->get_output_partial_shape(0).rank().get_length() == 0);
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}

View File

@@ -119,6 +119,29 @@ TEST(TransformationTests, ConvertPrecision_ShapeOf) {
ASSERT_FALSE(has_type<ngraph::element::Type_t::f16>(f));
}
TEST(TransformationTests, ConvertPrecision_ConstantRelu) {
std::shared_ptr<Function> f(nullptr);
{
auto input = opset4::Constant::create(element::f16, Shape{1, 1000, 4}, {0});
auto relu1 = std::make_shared<opset4::Relu>(input);
auto relu2 = std::make_shared<opset4::Relu>(relu1);
f = std::make_shared<Function>(NodeVector{relu2}, ParameterVector{});
pass::Manager manager;
static const precisions_array precisions = {
{ ngraph::element::f16, ngraph::element::f32 }
};
manager.register_pass<ngraph::pass::ConvertPrecision>(precisions);
manager.run_passes(f);
}
ASSERT_FALSE(has_type<ngraph::element::Type_t::i64>(f));
ASSERT_FALSE(has_type<ngraph::element::Type_t::f16>(f));
}
TEST(TransformationTests, ConvertPrecision_Convert) {
std::shared_ptr<Function> f(nullptr);
{

View File

@@ -68,7 +68,9 @@ TEST(TransformationTests, LowLatencyLSTM) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
NGRAPH_SUPPRESS_DEPRECATED_START
manager.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
manager.run_passes(f);
}
@@ -149,7 +151,9 @@ TEST(TransformationTests, LowLatencyGRU) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
NGRAPH_SUPPRESS_DEPRECATED_START
manager.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
manager.run_passes(f);
@@ -227,7 +231,9 @@ TEST(TransformationTests, LowLatencyRNN) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
NGRAPH_SUPPRESS_DEPRECATED_START
manager.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
manager.run_passes(f);
@@ -317,7 +323,9 @@ TEST(TransformationTests, LowLatencyLSTMReshape) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
NGRAPH_SUPPRESS_DEPRECATED_START
manager.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
manager.run_passes(f);
}
@@ -413,7 +421,9 @@ TEST(TransformationTests, LowLatencyLSTM_Loop) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
NGRAPH_SUPPRESS_DEPRECATED_START
manager.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
manager.run_passes(f);
}

View File

@@ -0,0 +1,829 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <string>
#include <memory>
#include <queue>
#include <ngraph/function.hpp>
#include <ngraph/opsets/opset7.hpp>
#include <ngraph/pass/manager.hpp>
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
#include <transformations/init_node_info.hpp>
#include <transformations/common_optimizations/low_latency.hpp>
#include <transformations/serialize.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
using namespace testing;
using namespace ngraph;
using namespace opset7;
using namespace std;
Output<Node> create_init_subgraph(const Output<Node>& in_node) {
auto const_zero = make_shared<Constant>(in_node.get_element_type(), Shape{1}, 0);
auto shape_of = make_shared<ShapeOf>(in_node);
auto broadcast = make_shared<Broadcast>(const_zero, shape_of);
return broadcast->output(0);
}
Output<Node> insert_identity(const Output<Node>& in_node) {
auto axis_1 = Constant::create(element::i64, Shape{1}, {1});
auto identity_1 = std::make_shared<Unsqueeze>(in_node, axis_1);
return std::make_shared<Squeeze>(identity_1, axis_1);
}
std::shared_ptr<Function> createLSTMBody(const std::shared_ptr<Parameter>& Xi,
const std::shared_ptr<Parameter>& H_t,
const std::shared_ptr<Parameter>& C_t,
bool is_loop = false) {
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(512 * 16, 0);
auto r_val = std::vector<float>(512 * 128, 0);
auto b_val = std::vector<float>(512, 0);
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
auto B = Constant::create(element::f32, Shape{512}, b_val);
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(unsqueeze);
auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
auto func = std::make_shared<Function>(OutputVector{res_1, res_2, res_3},
ParameterVector{Xi, H_t, C_t});
if (is_loop) {
auto body_condition = std::make_shared<Constant>(
element::boolean, Shape{1}, true);
auto cond_res = std::make_shared<Result>(body_condition);
func->add_results({cond_res});
}
return func;
}
TEST(TransformationTests, LowLatency2_LSTM) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto body = createLSTMBody(Xi, H_t, C_t);
auto results = body->get_results();
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_friendly_name("LSTMTensorIterator");
tensor_iterator->set_merged_input(C_t, C_init, results[2]);
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
tensor_iterator->set_merged_input(H_t, H_init, results[0]);
tensor_iterator->get_iter_value(results[0], -1);
tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
ParameterVector{X, H_init, C_init});
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::LowLatency2>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
const std::string variable_name_H("LSTMTensorIterator/variable0");
const std::string variable_name_C("LSTMTensorIterator/variable1");
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(512 * 16, 0);
auto r_val = std::vector<float>(512 * 128, 0);
auto b_val = std::vector<float>(512, 0);
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
auto B = Constant::create(element::f32, Shape{512}, b_val);
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
f_ref->add_sinks({assign_C, assign_H});
assign_H->add_control_dependency(read_value_H);
assign_C->add_control_dependency(read_value_C);
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, LowLatency2_GRU) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto Y = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto Yi = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(384 * 16, 0);
auto r_val = std::vector<float>(384 * 128, 0);
auto b_val = std::vector<float>(384, 0);
auto W = Constant::create(element::f32, Shape{384, 16}, w_val);
auto R = Constant::create(element::f32, Shape{384, 128}, r_val);
auto B = Constant::create(element::f32, Shape{384}, b_val);
auto gru_cell = std::make_shared<GRUCell>(squeeze, Yi, W, R, B, 128);
auto res_1 = std::make_shared<Result>(gru_cell);
auto unsqueeze = std::make_shared<Unsqueeze>(gru_cell, axis);
auto res_2 = std::make_shared<Result>(unsqueeze);
auto body = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, Yi});
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
tensor_iterator->set_merged_input(Yi, Y, res_1);
auto out0 = tensor_iterator->get_iter_value(res_1, -1);
auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
f = std::make_shared<Function>(NodeVector{res_ti_1}, ParameterVector{X, Y});
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::LowLatency2>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
const std::string variable_name_H("GRUTensorIterator/variable0");
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(384 * 16, 0);
auto r_val = std::vector<float>(384 * 128, 0);
auto b_val = std::vector<float>(384, 0);
auto W = Constant::create(element::f32, Shape{384, 16}, w_val);
auto R = Constant::create(element::f32, Shape{384, 128}, r_val);
auto B = Constant::create(element::f32, Shape{384}, b_val);
auto rnn_cell = std::make_shared<GRUCell>(squeeze, read_value_H, W, R, B, 128);
auto assign_H = std::make_shared<Assign>(rnn_cell->output(0), variable_H);
auto res_1 = std::make_shared<Result>(assign_H);
auto unsqueeze = std::make_shared<Unsqueeze>(rnn_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
f_ref = std::make_shared<Function>(ResultVector {res_2}, ParameterVector{Xi, H_t});
f_ref->add_sinks({assign_H});
assign_H->add_control_dependency(read_value_H);
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, LowLatency2_RNN) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto Y = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto Yi = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(128 * 16, 0);
auto r_val = std::vector<float>(128 * 128, 0);
auto b_val = std::vector<float>(128, 0);
auto W = Constant::create(element::f32, Shape{128, 16}, w_val);
auto R = Constant::create(element::f32, Shape{128, 128}, r_val);
auto B = Constant::create(element::f32, Shape{128}, b_val);
auto rnn_cell = std::make_shared<RNNCell>(squeeze, Yi, W, R, B, 128);
auto res_1 = std::make_shared<Result>(rnn_cell);
auto unsqueeze = std::make_shared<Unsqueeze>(rnn_cell, axis);
auto res_2 = std::make_shared<Result>(unsqueeze);
auto body = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi,
Yi});
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
tensor_iterator->set_merged_input(Yi, Y, res_1);
auto out0 = tensor_iterator->get_iter_value(res_1, -1);
auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
f = std::make_shared<Function>(NodeVector{res_ti_1}, ParameterVector{X, Y});
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::LowLatency2>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
const std::string variable_name_H("RNNTensorIterator/variable0");
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(128 * 16, 0);
auto r_val = std::vector<float>(128 * 128, 0);
auto b_val = std::vector<float>(128, 0);
auto W = Constant::create(element::f32, Shape{128, 16}, w_val);
auto R = Constant::create(element::f32, Shape{128, 128}, r_val);
auto B = Constant::create(element::f32, Shape{128}, b_val);
auto rnn_cell = std::make_shared<RNNCell>(squeeze, read_value_H, W, R, B, 128);
auto assign_H = std::make_shared<Assign>(rnn_cell->output(0), variable_H);
auto res_1 = std::make_shared<Result>(assign_H);
auto unsqueeze = std::make_shared<Unsqueeze>(rnn_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
f_ref = std::make_shared<Function>(ResultVector{res_2}, ParameterVector{Xi, H_t});
f_ref->add_sinks({assign_H});
assign_H->add_control_dependency(read_value_H);
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, LowLatency2_LSTMReshape) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{2, 1, 16});
auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto body = createLSTMBody(Xi, H_t, C_t);
auto results = body->get_results();
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_merged_input(C_t, C, results[2]);
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
tensor_iterator->set_merged_input(H_t, H, results[0]);
auto out0 = tensor_iterator->get_iter_value(results[0], -1);
auto out1 = tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2}, ParameterVector{X, H,
C});
// Reshape
// change the number of iteration of TI. 2 -> 1
auto new_X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
f->replace_parameter(0, new_X);
f->validate_nodes_and_infer_types();
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::LowLatency2>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
const std::string variable_name_H("LSTMTensorIterator/variable0");
const std::string variable_name_C("LSTMTensorIterator/variable1");
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(512 * 16, 0);
auto r_val = std::vector<float>(512 * 128, 0);
auto b_val = std::vector<float>(512, 0);
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
auto B = Constant::create(element::f32, Shape{512}, b_val);
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
f_ref->add_sinks({assign_C, assign_H});
assign_H->add_control_dependency(read_value_H);
assign_C->add_control_dependency(read_value_C);
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, LowLatency2_LSTM_Loop) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
// Body
auto body = createLSTMBody(Xi, H_t, C_t, true);
auto results = body->get_results();
auto trip_count =
std::make_shared<Constant>(element::i64, Shape{}, 1);
auto exec_condition =
std::make_shared<Constant>(element::boolean, Shape{}, true);
auto loop = std::make_shared<Loop>(trip_count, exec_condition);
loop->set_special_body_ports({-1, 3});
loop->set_function(body);
loop->set_friendly_name("LSTMLoop");
loop->set_merged_input(C_t, C_init, results[2]);
loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
loop->set_merged_input(H_t, H_init, results[0]);
auto out0 = loop->get_iter_value(results[0], -1);
auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(loop->output(1));
auto res_ti_2 = std::make_shared<Result>(loop->output(0));
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
ParameterVector{X, H_init, C_init});
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::LowLatency2>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
const std::string variable_name_H("LSTMTensorIterator/variable0");
const std::string variable_name_C("LSTMTensorIterator/variable1");
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(512 * 16, 0);
auto r_val = std::vector<float>(512 * 128, 0);
auto b_val = std::vector<float>(512, 0);
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
auto B = Constant::create(element::f32, Shape{512}, b_val);
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
f_ref->add_sinks({assign_C, assign_H});
assign_H->add_control_dependency(read_value_H);
assign_C->add_control_dependency(read_value_C);
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, LowLatency2_LSTM_several_iterations) {
constexpr int ITER_CNT = 5;
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{ITER_CNT, 1, 16});
auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto body = createLSTMBody(Xi, H_t, C_t);
auto results = body->get_results();
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_merged_input(C_t, C, results[2]);
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
tensor_iterator->set_merged_input(H_t, H, results[0]);
auto out0 = tensor_iterator->get_iter_value(results[0], -1);
auto out1 = tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2}, ParameterVector{X, H,
C});
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::LowLatency2>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
// TensorIterator not unrolled.
{
auto X = std::make_shared<Parameter>(element::f32, Shape{ITER_CNT, 1, 16});
auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
const std::string variable_name_H("LSTMTensorIterator/variable0");
const std::string variable_name_C("LSTMTensorIterator/variable1");
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H), variable_H);
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C), variable_C);
// Body
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(512 * 16, 0);
auto r_val = std::vector<float>(512 * 128, 0);
auto b_val = std::vector<float>(512, 0);
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
auto B = Constant::create(element::f32, Shape{512}, b_val);
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell, axis);
auto res_2 = std::make_shared<Result>(unsqueeze);
auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
auto body = std::make_shared<Function>(OutputVector{res_1, res_2, res_3},
ParameterVector{Xi, H_t, C_t});
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_merged_input(C_t, read_value_C, res_3);
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
tensor_iterator->set_merged_input(H_t, read_value_H, res_1);
auto out0 = tensor_iterator->get_iter_value(res_1, -1);
auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
auto out2 = tensor_iterator->get_iter_value(res_3, -1);
auto assign_H = std::make_shared<Assign>(out0, variable_H);
auto assign_C = std::make_shared<Assign>(out2, variable_C);
auto outer_res_2 = std::make_shared<Result>(out1);
auto outer_res_1 = std::make_shared<Result>(out0);
f_ref = std::make_shared<Function>(OutputVector{outer_res_1, outer_res_2}, ParameterVector{X, H, C});
f_ref->add_sinks({assign_C, assign_H});
assign_H->add_control_dependency(read_value_H);
assign_C->add_control_dependency(read_value_C);
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, LowLatency2_LSTM_Loop_Reshape) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{10, 1, 16});
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto body = createLSTMBody(Xi, H_t, C_t, true);
auto results = body->get_results();
auto shape_of = std::make_shared<ShapeOf>(X);
const auto trip_count = std::make_shared<Gather>(shape_of, Constant::create(ngraph::element::i64, {1}, {0}),
Constant::create(ngraph::element::i64, {1}, {0}));
auto exec_condition =
std::make_shared<Constant>(element::boolean, Shape{}, true);
auto loop = std::make_shared<Loop>(trip_count, exec_condition);
loop->set_special_body_ports({-1, 3});
loop->set_function(body);
loop->set_friendly_name("LSTMLoop");
loop->set_merged_input(C_t, C_init, results[2]);
loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
loop->set_merged_input(H_t, H_init, results[0]);
auto out0 = loop->get_iter_value(results[0], -1);
auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(loop->output(1));
auto res_ti_2 = std::make_shared<Result>(loop->output(0));
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
ParameterVector{X, H_init, C_init});
// Reshape
// change the number of iteration of Loop. 10 -> 1
auto new_X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
f->replace_parameter(0, new_X);
f->validate_nodes_and_infer_types();
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::LowLatency2>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
const std::string variable_name_H("LSTMTensorIterator/variable0");
const std::string variable_name_C("LSTMTensorIterator/variable1");
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(512 * 16, 0);
auto r_val = std::vector<float>(512 * 128, 0);
auto b_val = std::vector<float>(512, 0);
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
auto B = Constant::create(element::f32, Shape{512}, b_val);
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
f_ref->add_sinks({assign_C, assign_H});
assign_H->add_control_dependency(read_value_H);
assign_C->add_control_dependency(read_value_C);
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, LowLatency2_LSTM_Loop_several_iterations) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{10, 1, 16});
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto body = createLSTMBody(Xi, H_t, C_t, true);
auto results = body->get_results();
auto trip_count =
std::make_shared<Constant>(element::i64, Shape{}, 10);
auto exec_condition =
std::make_shared<Constant>(element::boolean, Shape{}, true);
auto loop = std::make_shared<Loop>(trip_count, exec_condition);
loop->set_special_body_ports({-1, 3});
loop->set_function(body);
loop->set_friendly_name("LSTMLoop");
loop->set_merged_input(C_t, C_init, results[2]);
loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
loop->set_merged_input(H_t, H_init, results[0]);
auto out0 = loop->get_iter_value(results[0], -1);
auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(loop->output(1));
auto res_ti_2 = std::make_shared<Result>(loop->output(0));
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
ParameterVector{X, H_init, C_init});
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::LowLatency2>(true);
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto X = std::make_shared<Parameter>(element::f32, Shape{10, 1, 16});
auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
const std::string variable_name_H("LSTMTensorIterator/variable0");
const std::string variable_name_C("LSTMTensorIterator/variable1");
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H), variable_H);
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C), variable_C);
// Body
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(512 * 16, 0);
auto r_val = std::vector<float>(512 * 128, 0);
auto b_val = std::vector<float>(512, 0);
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
auto B = Constant::create(element::f32, Shape{512}, b_val);
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(unsqueeze);
auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
auto body_condition = std::make_shared<Constant>(
element::boolean, Shape{1}, true);
auto body = std::make_shared<Function>(OutputVector{res_1, res_2, res_3, body_condition},
ParameterVector{Xi, H_t, C_t});
auto trip_count =
std::make_shared<Constant>(element::i64, Shape{}, 10);
auto exec_condition =
std::make_shared<Constant>(element::boolean, Shape{}, true);
auto loop = std::make_shared<Loop>(trip_count, exec_condition);
loop->set_special_body_ports({-1, 3});
loop->set_function(body);
loop->set_friendly_name("LSTMLoop");
loop->set_merged_input(C_t, read_value_C, res_3);
loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
loop->set_merged_input(H_t, read_value_H, res_1);
auto out0 = loop->get_iter_value(res_1, -1);
auto out1 = loop->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
auto out3 = loop->get_iter_value(res_3, -1);
auto assign_H = std::make_shared<Assign>(out0, variable_H);
auto assign_C = std::make_shared<Assign>(out3, variable_C);
auto outer_res_2 = std::make_shared<Result>(out1);
auto outer_res_1 = std::make_shared<Result>(out0);
f_ref = std::make_shared<Function>(OutputVector{outer_res_1, outer_res_2}, ParameterVector{X, H, C});
f_ref->add_sinks({assign_C, assign_H});
assign_H->add_control_dependency(read_value_H);
assign_C->add_control_dependency(read_value_C);
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, LowLatencyLSTM_LLTv1_LLTv2) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(512 * 16, 0);
auto r_val = std::vector<float>(512 * 128, 0);
auto b_val = std::vector<float>(512, 0);
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
auto B = Constant::create(element::f32, Shape{512}, b_val);
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(unsqueeze);
auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
auto body = std::make_shared<Function>(OutputVector{res_1, res_2, res_3}, ParameterVector{Xi, H_t, C_t});
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_friendly_name("LSTMTensorIterator");
tensor_iterator->set_merged_input(C_t, C_init, res_3);
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
tensor_iterator->set_merged_input(H_t, H_init, res_1);
auto out0 = tensor_iterator->get_iter_value(res_1, -1);
auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
ParameterVector{X, H_init, C_init});
auto f_2 = ngraph::clone_function(*f);
pass::Manager manager_2;
manager_2.register_pass<pass::InitNodeInfo>();
NGRAPH_SUPPRESS_DEPRECATED_START
manager_2.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END
EXPECT_NO_THROW(manager_2.run_passes(f_2));
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
NGRAPH_SUPPRESS_DEPRECATED_START
manager.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END
// LLT v2 doesn't insert Assign/ReadValue ops, they are already inserted
// but unrolls TI/Loop
manager.register_pass<pass::LowLatency2>();
EXPECT_NO_THROW(manager.run_passes(f));
}
}

View File

@@ -0,0 +1,205 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <string>
#include <memory>
#include <ngraph/function.hpp>
#include <ngraph/opsets/opset7.hpp>
#include <ngraph/pass/manager.hpp>
#include <transformations/common_optimizations/split_squeeze_concat_fusion.hpp>
#include <transformations/init_node_info.hpp>
#include <transformations/utils/utils.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
using namespace testing;
TEST(TransformationTests, SplitSqueezeConcatFusion) {
size_t num_splits = 4;
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
{
auto input = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 2, num_splits, 640, 20, 2 });
auto split_axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, { 2 });
auto split = std::make_shared<ngraph::opset7::Split>(input, split_axis, num_splits);
ngraph::OutputVector squeeze_vec(num_splits);
for (size_t i = 0; i < squeeze_vec.size(); i++) {
auto squeeze_axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{ 1 }, { 2 });
squeeze_vec[i] = std::make_shared<ngraph::opset7::Squeeze>(split->output(i), squeeze_axis)->output(0);
}
auto concat = std::make_shared<ngraph::opset7::Concat>(squeeze_vec, 4);
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{ concat }, ngraph::ParameterVector{ input });
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
manager.register_pass<ngraph::pass::SplitSqueezeConcatFusion>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto input = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 2, num_splits, 640, 20, 2 });
auto transpose_order = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{ 6 }, { 0, 1, 3, 4, 2, 5 });
auto transpose = std::make_shared<ngraph::opset7::Transpose>(input, transpose_order);
auto reshape_shape = ngraph::opset7::Constant::create<int64_t>(ngraph::element::i64, ngraph::Shape{ 5 },
{ 1, 2, 640, 20, 2 * (int64_t)num_splits });
auto reshape = std::make_shared<ngraph::opset7::Reshape>(transpose, reshape_shape, false);
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ reshape }, ngraph::ParameterVector{ input });
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, SplitSqueezeConcatFusionNegativeCaseNotAllSplitOutputsGoToSqueeze) {
size_t num_splits = 4;
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
{
auto input = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 2, num_splits, 640, 20, 2 });
auto split_axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, { 2 });
auto split = std::make_shared<ngraph::opset7::Split>(input, split_axis, num_splits);
ngraph::OutputVector squeeze_vec(num_splits - 1);
for (size_t i = 0; i < squeeze_vec.size(); i++) {
auto squeeze_axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{ 1 }, { 2 });
squeeze_vec[i] = std::make_shared<ngraph::opset7::Squeeze>(split->output(i), squeeze_axis)->output(0);
}
auto concat = std::make_shared<ngraph::opset7::Concat>(squeeze_vec, 4);
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{ concat }, ngraph::ParameterVector{ input });
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ concat }, ngraph::ParameterVector{ input });
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
manager.register_pass<ngraph::pass::SplitSqueezeConcatFusion>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto input = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 2, num_splits, 640, 20, 2 });
auto split_axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, { 2 });
auto split = std::make_shared<ngraph::opset7::Split>(input, split_axis, num_splits);
ngraph::OutputVector squeeze_vec(num_splits - 1);
for (size_t i = 0; i < squeeze_vec.size(); i++) {
auto squeeze_axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{ 1 }, { 2 });
squeeze_vec[i] = std::make_shared<ngraph::opset7::Squeeze>(split->output(i), squeeze_axis)->output(0);
}
auto concat = std::make_shared<ngraph::opset7::Concat>(squeeze_vec, 4);
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ concat }, ngraph::ParameterVector{ input });
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, SplitSqueezeConcatFusionNegativeCaseSplitOutputsGoInDifferentOrder) {
size_t num_splits = 4;
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
{
auto input = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 2, num_splits, 640, 20, 2 });
auto split_axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, { 2 });
auto split = std::make_shared<ngraph::opset7::Split>(input, split_axis, num_splits);
ngraph::OutputVector squeeze_vec(num_splits);
for (size_t i = 0; i < squeeze_vec.size(); i++) {
auto squeeze_axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{ 1 }, { 2 });
squeeze_vec[i] = std::make_shared<ngraph::opset7::Squeeze>(split->output(i), squeeze_axis)->output(0);
}
std::swap(squeeze_vec[1], squeeze_vec[2]);
auto concat = std::make_shared<ngraph::opset7::Concat>(squeeze_vec, 4);
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{ concat }, ngraph::ParameterVector{ input });
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ concat }, ngraph::ParameterVector{ input });
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
manager.register_pass<ngraph::pass::SplitSqueezeConcatFusion>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto input = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 2, num_splits, 640, 20, 2 });
auto split_axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, { 2 });
auto split = std::make_shared<ngraph::opset7::Split>(input, split_axis, num_splits);
ngraph::OutputVector squeeze_vec(num_splits);
for (size_t i = 0; i < squeeze_vec.size(); i++) {
auto squeeze_axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{ 1 }, { 2 });
squeeze_vec[i] = std::make_shared<ngraph::opset7::Squeeze>(split->output(i), squeeze_axis)->output(0);
}
std::swap(squeeze_vec[1], squeeze_vec[2]);
auto concat = std::make_shared<ngraph::opset7::Concat>(squeeze_vec, 4);
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ concat }, ngraph::ParameterVector{ input });
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, SplitSqueezeConcatFusionNegativeCaseSplitAxisDifferentFromSqueezeAxis) {
size_t num_splits = 4;
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
{
auto input = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 2, num_splits, 640, 20, 2 });
auto split_axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, { 2 });
auto split = std::make_shared<ngraph::opset7::Split>(input, split_axis, num_splits);
ngraph::OutputVector squeeze_vec(num_splits);
for (size_t i = 0; i < squeeze_vec.size(); i++) {
auto squeeze_axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{ 1 }, { 0 });
squeeze_vec[i] = std::make_shared<ngraph::opset7::Squeeze>(split->output(i), squeeze_axis)->output(0);
}
auto concat = std::make_shared<ngraph::opset7::Concat>(squeeze_vec, 4);
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{ concat }, ngraph::ParameterVector{ input });
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ concat }, ngraph::ParameterVector{ input });
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
manager.register_pass<ngraph::pass::SplitSqueezeConcatFusion>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto input = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 2, num_splits, 640, 20, 2 });
auto split_axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, { 2 });
auto split = std::make_shared<ngraph::opset7::Split>(input, split_axis, num_splits);
ngraph::OutputVector squeeze_vec(num_splits);
for (size_t i = 0; i < squeeze_vec.size(); i++) {
auto squeeze_axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{ 1 }, { 0 });
squeeze_vec[i] = std::make_shared<ngraph::opset7::Squeeze>(split->output(i), squeeze_axis)->output(0);
}
auto concat = std::make_shared<ngraph::opset7::Concat>(squeeze_vec, 4);
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ concat }, ngraph::ParameterVector{ input });
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}

View File

@@ -201,3 +201,68 @@ INSTANTIATE_TEST_CASE_P(TransposeSinkingSqueeze, TransposeSinking, testing::Comb
testing::Values(
ngraph::opset6::Squeeze::type_info)));
TEST(TransformationTests, TransposeFuseEliminatesTranspose) {
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
{
auto input = std::make_shared<ngraph::opset6::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 2, 640, 20, 2 });
auto tr1_order = ngraph::opset6::Constant::create(ngraph::element::i64, ngraph::Shape{ 5 }, { 0, 2, 3, 4, 1 });
auto transpose1 = std::make_shared<ngraph::opset6::Transpose>(input, tr1_order);
auto tr2_order = ngraph::opset6::Constant::create(ngraph::element::i64, ngraph::Shape{ 5 }, { 0, 4, 1, 2, 3 });
auto transpose2 = std::make_shared<ngraph::opset6::Transpose>(transpose1, tr2_order);
auto add_const = ngraph::opset6::Constant::create(ngraph::element::f32, ngraph::Shape{ 1 }, { 1 });
auto add = std::make_shared<ngraph::opset6::Add>(transpose2, add_const);
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input });
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
manager.register_pass<ngraph::pass::TransposeFuse>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto input = std::make_shared<ngraph::opset6::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 2, 640, 20, 2 });
auto add_const = ngraph::opset6::Constant::create(ngraph::element::f32, ngraph::Shape{ 1 }, { 1 });
auto add = std::make_shared<ngraph::opset6::Add>(input, add_const);
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input });
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, TransposeFuses) {
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
{
auto input = std::make_shared<ngraph::opset6::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 2, 640, 20, 2, 2 });
auto tr1_order = ngraph::opset6::Constant::create(ngraph::element::i64, ngraph::Shape{ 6 }, { 0, 5, 1, 2, 3, 4 });
auto transpose1 = std::make_shared<ngraph::opset6::Transpose>(input, tr1_order);
auto tr2_order = ngraph::opset6::Constant::create(ngraph::element::i64, ngraph::Shape{ 6 }, { 0, 1, 3, 4, 2, 5 });
auto transpose2 = std::make_shared<ngraph::opset6::Transpose>(transpose1, tr2_order);
auto add_const = ngraph::opset6::Constant::create(ngraph::element::f32, ngraph::Shape{ 1 }, { 1 });
auto add = std::make_shared<ngraph::opset6::Add>(transpose2, add_const);
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input });
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
manager.register_pass<ngraph::pass::TransposeFuse>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto input = std::make_shared<ngraph::opset6::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 2, 640, 20, 2, 2 });
auto tr_order = ngraph::opset6::Constant::create(ngraph::element::i64, ngraph::Shape{ 6 }, { 0, 5, 2, 3, 1, 4 });
auto transpose = std::make_shared<ngraph::opset6::Transpose>(input, tr_order);
auto add_const = ngraph::opset6::Constant::create(ngraph::element::f32, ngraph::Shape{ 1 }, { 1 });
auto add = std::make_shared<ngraph::opset6::Add>(transpose, add_const);
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input });
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}

Some files were not shown because too many files have changed in this diff Show More