Files
openvino/model-optimizer/mo/utils/simple_proto_parser.py
Alexey Suhov 6478f1742a Align copyright notice in python scripts (CVS-51320) (#4974)
* Align copyright notice in python scripts (CVS-51320)
2021-03-26 17:54:28 +03:00

197 lines
7.7 KiB
Python

# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import ast
import logging as log
import os
from mo.utils.error import Error
class SimpleProtoParser(object):
"""
This is a simple Proto2 parser that has limited functionality and is intended to parse configuration files for the
models created with Object Detection API only. The result of the parser is the dictionary.
"""
_tokens = list()
_result = dict()
def __init__(self):
self._tokens = list()
self._result = dict()
@staticmethod
def _convert_value_to_correct_datatype(value: str):
"""
Converts string representation of the token to a value with proper data type.
:param value: string representation to be converted.
:return: converted to a correct data type value.
"""
if value == 'true':
return True
if value == 'false':
return False
try:
result = ast.literal_eval(value)
return result
except Exception: # if it is not possible to evaluate the value then consider it as a string
return value
@staticmethod
def _convert_values_to_correct_datatypes(d: dict):
"""
Convert dictionary with values to correct data types.
:param d: dictionary with values.
:return: None
"""
for key, value in d.items():
if isinstance(value, dict):
__class__._convert_values_to_correct_datatypes(value)
elif isinstance(value, list):
d[key] = [__class__._convert_value_to_correct_datatype(item) for item in value]
else:
d[key] = __class__._convert_value_to_correct_datatype(value)
def _add_non_empty_token(self, token: str):
"""
Add token to the list of tokens if it is non-empty.
:param token: token to add
:return: None
"""
if token != "":
self._tokens.append(token)
def _parse_list(self, result: list, token_ind: int):
prev_token = '['
while token_ind < len(self._tokens):
cur_token = self._tokens[token_ind]
if cur_token == ']':
return token_ind + 1
if cur_token == ',':
if prev_token == ',' or prev_token == '[':
raise Error('Missing value in the list at position {}'.format(token_ind))
else:
result.append(cur_token)
token_ind += 1
prev_token = cur_token
return token_ind
def _parse_tokens(self, result: dict, token_ind: int, depth: int=0):
"""
Internal function that parses tokens.
:param result: current dictionary where to store parse result.
:param token_ind: index of the token from the tokens list to start parsing from.
:return: token index to continue parsing from.
"""
while token_ind < len(self._tokens):
cur_token = self._tokens[token_ind]
if cur_token == ',': # redundant commas that we simply ignore everywhere except list "[x, y, z...]"
token_ind += 1
continue
if cur_token == '}':
return token_ind + 1
next_token = self._tokens[token_ind + 1]
if next_token == '{':
result[cur_token] = dict()
token_ind = self._parse_tokens(result[cur_token], token_ind + 2, depth + 1)
elif next_token == ':':
next_next_token = self._tokens[token_ind + 2]
if next_next_token == '[':
result[cur_token] = list()
token_ind = self._parse_list(result[cur_token], token_ind + 3)
else:
if cur_token not in result:
result[cur_token] = self._tokens[token_ind + 2]
else:
if not isinstance(result[cur_token], list):
old_val = result[cur_token]
result[cur_token] = [old_val]
result[cur_token].append(self._tokens[token_ind + 2])
token_ind += 3
else:
raise Error('Wrong character "{}" in position {}'.format(next_token, token_ind))
if depth != 0:
raise Error('Input/output braces mismatch.')
return token_ind
def _convert_tokens_to_dict(self):
"""
Convert list of tokens into a dictionary with proper structure.
Then converts values in the dictionary to values of correct data types. For example, 'false' -> False,
'true' -> true, '0.004' -> 0.004, etc.
:return: True if conversion is successful.
"""
try:
self._parse_tokens(self._result, 0)
except Exception as ex:
log.error('Failed to convert tokens to dictionary: {}'.format(str(ex)))
return False
self._convert_values_to_correct_datatypes(self._result)
return True
def _split_to_tokens(self, file_content: str):
"""
The function gets file content as string and converts it to the list of tokens (all tokens are still strings).
:param file_content: file content as a string
"""
cur_token = ''
string_started = False
for line in file_content.split('\n'):
cur_token = ''
line = line.strip()
if line.startswith('#'): # skip comments
continue
for char in line:
if string_started:
if char == '"': # string ended
self._add_non_empty_token(cur_token)
cur_token = '' # start of a new string
string_started = False
else:
cur_token += char
elif char == '"':
self._add_non_empty_token(cur_token)
cur_token = '' # start of a new string
string_started = True
elif (char == " " and not string_started) or char == '\n':
self._add_non_empty_token(cur_token)
cur_token = ''
elif char in [':', '{', '}', '[', ']', ',']:
self._add_non_empty_token(cur_token)
self._tokens.append(char)
cur_token = ''
else:
cur_token += char
self._add_non_empty_token(cur_token)
self._add_non_empty_token(cur_token)
def parse_from_string(self, file_content: str):
"""
Parses the proto text file passed as a string.
:param file_content: content of the file.
:return: dictionary with file content or None if the file cannot be parsed.
"""
self._split_to_tokens(file_content)
if not self._convert_tokens_to_dict():
log.error('Failed to generate dictionary representation of file.')
return None
return self._result
def parse_file(self, file_name: str):
"""
Parses the specified file and returns its representation as dictionary.
:param file_name: file name to parse.
:return: dictionary with file content or None if the file cannot be parsed.
"""
if not os.path.exists(file_name):
log.error('File {} does not exist'.format(file_name))
return None
try:
with open(file_name) as file:
file_content = file.readlines()
except Exception as ex:
log.error('Failed to read file {}: {}'.format(file_name, str(ex)))
return None
return self.parse_from_string(''.join(file_content))