mirror of
https://github.com/Cantera/cantera.git
synced 2025-02-25 18:55:29 -06:00
[ctml2yaml] Fix several XML parsing errors
The Python Expat parser requires that the <?xml version...> tag occur as the first characters in the file, even before any blank space, so lstrip is used to remove any whitespace. In addition, raw & characters are replaced with their escaped version.
This commit is contained in:
@@ -2122,7 +2122,20 @@ def create_phases_from_data_node(
|
||||
def convert(inpfile: Union[str, Path], outfile: Union[str, Path]):
|
||||
"""Convert an input CTML file to a YAML file."""
|
||||
inpfile = Path(inpfile)
|
||||
ctml_tree = etree.parse(str(inpfile)).getroot()
|
||||
ctml_text = inpfile.read_text().lstrip()
|
||||
# Replace any raw ampersands in the text with an escaped ampersand. This
|
||||
# substitution is necessary because ctml_writer outputs literal & characters
|
||||
# from text data into the XML output. Although this doesn't cause a problem
|
||||
# with the custom XML parser in Cantera, standards-compliant XML parsers
|
||||
# like the Expat one included in Python can't handle the raw & character. I
|
||||
# could not figure out a way to override the parsing logic such that & could
|
||||
# be escaped in the data during parsing, so it has to be done manually here.
|
||||
# According to https://stackoverflow.com/a/1091953 there are 5 escaped
|
||||
# characters in XML: " ("), ' ('), & (&), < (<), and >
|
||||
# (>). This code only replaces & not followed by one of the escaped
|
||||
# character codes.
|
||||
ctml_text = re.sub("&(?!amp;|quot;|apos;|lt;|gt;)", "&", ctml_text)
|
||||
ctml_tree = etree.fromstring(ctml_text)
|
||||
|
||||
species_data = create_species_from_data_node(ctml_tree)
|
||||
reaction_data = create_reactions_from_data_node(ctml_tree)
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<speciesArray datasrc="#species_data">
|
||||
(Parens) @#$%^-2 co:lons: [xy2]*{.}
|
||||
plus+ eq=uals plus trans_butene
|
||||
co</speciesArray>
|
||||
co amp&ersand</speciesArray>
|
||||
<reactionArray datasrc="#reaction_data"/>
|
||||
<state>
|
||||
<temperature units="K">300.0</temperature>
|
||||
@@ -175,6 +175,24 @@
|
||||
</NASA>
|
||||
</thermo>
|
||||
</species>
|
||||
|
||||
<!-- species amp&ersand -->
|
||||
<species name="amp&ersand">
|
||||
<atomArray>C:1 H:4 </atomArray>
|
||||
<note>Contains a raw & character</note>
|
||||
<thermo>
|
||||
<NASA Tmin="200.0" Tmax="1000.0" P0="100000.0">
|
||||
<floatArray size="7" name="coeffs">
|
||||
5.149876130E+00, -1.367097880E-02, 4.918005990E-05, -4.847430260E-08,
|
||||
1.666939560E-11, -1.024664760E+04, -4.641303760E+00</floatArray>
|
||||
</NASA>
|
||||
<NASA Tmin="1000.0" Tmax="3500.0" P0="100000.0">
|
||||
<floatArray size="7" name="coeffs">
|
||||
7.485149500E-02, 1.339094670E-02, -5.732858090E-06, 1.222925350E-09,
|
||||
-1.018152300E-13, -9.468344590E+03, 1.843731800E+01</floatArray>
|
||||
</NASA>
|
||||
</thermo>
|
||||
</species>
|
||||
</speciesData>
|
||||
<reactionData id="reaction_data">
|
||||
|
||||
@@ -366,5 +384,19 @@
|
||||
<reactants>co:1.0 co:lons::1</reactants>
|
||||
<products>plus+:2.0</products>
|
||||
</reaction>
|
||||
|
||||
<!-- reaction 0013 -->
|
||||
<reaction id="0013" reversible="yes">
|
||||
<equation>amp&ersand [=] plus+</equation>
|
||||
<rateCoeff>
|
||||
<Arrhenius>
|
||||
<A>9.999000E+06</A>
|
||||
<b>9.9</b>
|
||||
<E units="cal/mol">999.900000</E>
|
||||
</Arrhenius>
|
||||
</rateCoeff>
|
||||
<reactants>amp&ersand:1.0</reactants>
|
||||
<products>plus+:1.0</products>
|
||||
</reaction>
|
||||
</reactionData>
|
||||
</ctml>
|
||||
|
||||
Reference in New Issue
Block a user