[ctml2yaml] Fix several XML parsing errors

The Python Expat parser requires that the <?xml version...> tag occur as
the first characters in the file, even before any blank space, so lstrip
is used to remove any whitespace. In addition, raw & characters are
replaced with their escaped version.
This commit is contained in:
Bryan W. Weber
2019-11-17 21:17:20 -05:00
parent 66c1433101
commit dfd4e6d4bf
2 changed files with 47 additions and 2 deletions

View File

@@ -2122,7 +2122,20 @@ def create_phases_from_data_node(
def convert(inpfile: Union[str, Path], outfile: Union[str, Path]):
"""Convert an input CTML file to a YAML file."""
inpfile = Path(inpfile)
ctml_tree = etree.parse(str(inpfile)).getroot()
ctml_text = inpfile.read_text().lstrip()
# Replace any raw ampersands in the text with an escaped ampersand. This
# substitution is necessary because ctml_writer outputs literal & characters
# from text data into the XML output. Although this doesn't cause a problem
# with the custom XML parser in Cantera, standards-compliant XML parsers
# like the Expat one included in Python can't handle the raw & character. I
# could not figure out a way to override the parsing logic such that & could
# be escaped in the data during parsing, so it has to be done manually here.
# According to https://stackoverflow.com/a/1091953 there are 5 escaped
# characters in XML: " (&quot;), ' (&apos;), & (&amp;), < (&lt;), and >
# (&gt;). This code only replaces & not followed by one of the escaped
# character codes.
ctml_text = re.sub("&(?!amp;|quot;|apos;|lt;|gt;)", "&amp;", ctml_text)
ctml_tree = etree.fromstring(ctml_text)
species_data = create_species_from_data_node(ctml_tree)
reaction_data = create_reactions_from_data_node(ctml_tree)

View File

@@ -8,7 +8,7 @@
<speciesArray datasrc="#species_data">
(Parens) @#$%^-2 co:lons: [xy2]*{.}
plus+ eq=uals plus trans_butene
co</speciesArray>
co amp&ersand</speciesArray>
<reactionArray datasrc="#reaction_data"/>
<state>
<temperature units="K">300.0</temperature>
@@ -175,6 +175,24 @@
</NASA>
</thermo>
</species>
<!-- species amp&ersand -->
<species name="amp&ersand">
<atomArray>C:1 H:4 </atomArray>
<note>Contains a raw & character</note>
<thermo>
<NASA Tmin="200.0" Tmax="1000.0" P0="100000.0">
<floatArray size="7" name="coeffs">
5.149876130E+00, -1.367097880E-02, 4.918005990E-05, -4.847430260E-08,
1.666939560E-11, -1.024664760E+04, -4.641303760E+00</floatArray>
</NASA>
<NASA Tmin="1000.0" Tmax="3500.0" P0="100000.0">
<floatArray size="7" name="coeffs">
7.485149500E-02, 1.339094670E-02, -5.732858090E-06, 1.222925350E-09,
-1.018152300E-13, -9.468344590E+03, 1.843731800E+01</floatArray>
</NASA>
</thermo>
</species>
</speciesData>
<reactionData id="reaction_data">
@@ -366,5 +384,19 @@
<reactants>co:1.0 co:lons::1</reactants>
<products>plus+:2.0</products>
</reaction>
<!-- reaction 0013 -->
<reaction id="0013" reversible="yes">
<equation>amp&ersand [=] plus+</equation>
<rateCoeff>
<Arrhenius>
<A>9.999000E+06</A>
<b>9.9</b>
<E units="cal/mol">999.900000</E>
</Arrhenius>
</rateCoeff>
<reactants>amp&ersand:1.0</reactants>
<products>plus+:1.0</products>
</reaction>
</reactionData>
</ctml>