From 87c1a3f70f5c96e448c02acbd1c7584bbf0d1bf4 Mon Sep 17 00:00:00 2001 From: Bryan Weber Date: Sun, 11 Apr 2021 22:54:03 -0400 Subject: [PATCH] Refactor CSV and profile tests to better use NumPy --- site_scons/buildutils.py | 176 ++++++++++++++++++++++----------------- 1 file changed, 100 insertions(+), 76 deletions(-) diff --git a/site_scons/buildutils.py b/site_scons/buildutils.py index a5a675335..107d5b169 100644 --- a/site_scons/buildutils.py +++ b/site_scons/buildutils.py @@ -350,65 +350,84 @@ def compare_profiles(env, ref_file, sample_file): After interpolation, each data point must satisfy a combined relative and absolute error criterion specified by `rtol` and `atol`. """ - atol = env['test_csv_threshold'] - rtol = env['test_csv_tolerance'] - xtol = env['test_csv_tolerance'] - if not np: - print('WARNING: skipping profile comparison because numpy is not available') - return 0 + build_logger.warning("Skipping profile comparison because numpy is not available") + return TestResult.PASS - reference = np.genfromtxt(ref_file, delimiter=',').T - sample = np.genfromtxt(sample_file, delimiter=',').T - assert reference.shape[0] == sample.shape[0] + atol = env["test_csv_threshold"] + rtol = env["test_csv_tolerance"] + xtol = env["test_csv_tolerance"] - # trim header rows if present - if np.isnan(sample[0,0]) and np.isnan(reference[0,0]): + reference = np.genfromtxt(ref_file, delimiter=",").T + sample = np.genfromtxt(sample_file, delimiter=",").T + if reference.shape[0] != sample.shape[0]: + build_logger.error( + "The output array does not have the same number of variabls as the " + "reference array." + ) + return TestResult.FAIL + + # trim header columns if present + if np.isnan(sample[0, 0]) and np.isnan(reference[0, 0]): reference = reference[:, 1:] sample = sample[:, 1:] - assert not np.isnan(reference).any() - assert not np.isnan(sample).any() + if np.isnan(reference).any() or np.isnan(sample).any(): + build_logger.error( + "The output array and reference array have different headers " + "or contain non-numeric data." + ) + return TestResult.FAIL - nVars = reference.shape[0] - nTimes = reference.shape[1] + n_vars = reference.shape[0] + n_times = reference.shape[1] bad = [] - template = '{0:9.4e} {1: 3d} {2:14.7e} {3:14.7e} {4:9.3e} {5:9.3e} {6:9.3e}' - for i in range(1, nVars): - scale = max(max(abs(reference[i])), reference[i].ptp(), - max(abs(sample[i])), sample[i].ptp()) - slope = np.zeros(nTimes) - slope[1:] = np.diff(reference[i]) / np.diff(reference[0]) * reference[0].ptp() + template = "{0:10.4e} {1:5d} {2:14.7e} {3:14.7e} {4:9.3e} {5:9.3e} {6:9.3e}" + header = ["Failed series comparisons:"] + header.append("{:10s} {:5s} {:14s} {:14s} {:9s} {:9s} {:9s}".format( + "coordinate", "comp.", "reference val.", "test value", "abs. err", "rel. err", + "pos. err" + )) + header.append(f"{10*'-'} ----- {14*'-'} {14*'-'} {9*'-'} {9*'-'} {9*'-'}") + ref_ptp = reference.ptp(axis=1) + ref_max = np.abs(reference).max(axis=1) + sample_ptp = sample.ptp(axis=1) + sample_max = np.abs(sample).max(axis=1) + scale = np.maximum( + np.maximum(ref_ptp[1:], ref_max[1:]), + np.maximum(sample_ptp[1:], sample_max[1:]) + ).reshape(n_vars - 1, -1) + ref_diff = np.diff(reference) + slope = ref_diff[1:, :] / ref_diff[0, :] * ref_ptp[0] + slope = np.hstack((np.zeros((n_vars - 1, 1)), slope)) + comp = np.zeros((n_vars - 1, n_times)) + for i, row in enumerate(sample[1:]): + comp[i, :] = np.interp(reference[0, :], sample[0, :], row) - comp = np.interp(reference[0], sample[0], sample[i]) - for j in range(nTimes): - a = reference[i,j] - b = comp[j] - abserr = abs(a-b) - relerr = abs(a-b) / (scale + atol) - - # error that can be accounted for by shifting the profile along - # the time / spatial coordinate - xerr = abserr / (abs(slope[j]) + atol) - - if abserr > atol and relerr > rtol and xerr > xtol: - bad.append((reference[0][j], i, a, b, abserr, relerr, xerr)) + abserr = np.abs(reference[1:] - comp) + relerr = abserr / (scale + atol) + # error that can be accounted for by shifting the profile along + # the time / spatial coordinate + xerr = abserr / (np.abs(slope) + atol) + if np.any(abserr > atol) and np.any(relerr > rtol) and np.any(xerr > xtol): + it = np.nditer((abserr, relerr, xerr), flags=["multi_index"]) + for a, r, x in it: + i, j = it.multi_index + bad.append((reference[0, j], i, reference[i,j], comp[i,j], a, r, x)) + # Fix line lengths here footer = [] maxrows = 10 if len(bad) > maxrows: bad.sort(key=lambda row: -row[5]) - footer += ['Plus {0} more points exceeding error thresholds.'.format(len(bad)-maxrows)] + footer += [f"Plus {len(bad) - maxrows} more points exceeding error thresholds."] bad = bad[:maxrows] if bad: - header = ['Failed series comparisons:', - 'coordinate comp. reference val test value abs. err rel. err pos. err', - '---------- --- -------------- -------------- --------- --------- ---------'] - print('\n'.join(header + [template.format(*row) for row in bad] + footer)) - return 1 + output_logger.error("\n".join(header + [template.format(*row) for row in bad] + footer)) + return TestResult.FAIL else: - return 0 + return TestResult.PASS def compare_csv_files(env, file1: Path, file2: Path): @@ -428,53 +447,58 @@ def compare_csv_files(env, file1: Path, file2: Path): files are dissimilar. Lines containing non-numeric data are automatically ignored. """ - try: - import numpy as np - except ImportError: - print('WARNING: skipping .csv diff because numpy is not installed') - return 0 + if not np: + build_logger.warning("Skipping profile comparison because numpy is not available") + return TestResult.PASS # decide how many header lines to skip - f = open(file1) - headerRows = 0 - goodChars = set('0123456789.+-eE, ') + f = Path(file1).read_text().split("\n") + header_rows = 0 + good_chars = set("0123456789.+-eE, ") for line in f: - if not set(line[:-1]).issubset(goodChars): - headerRows += 1 + if not set(line).issubset(good_chars): + header_rows += 1 else: break try: - data1 = np.genfromtxt(file1, skip_header=headerRows, delimiter=',') - data2 = np.genfromtxt(file2, skip_header=headerRows, delimiter=',') + data1 = np.genfromtxt(file1, skip_header=header_rows, delimiter=",") + data2 = np.genfromtxt(file2, skip_header=header_rows, delimiter=",") except (IOError, StopIteration) as e: - print(e) - return 1 + build_logger.error(f"Could not read data files: {file1}; {file2}", exc_info=e) + return TestResult.FAIL + threshold = env["test_csv_threshold"] try: - relerror = (np.abs(data2-data1) / - (np.maximum(np.abs(data2), np.abs(data1)) + - env['test_csv_threshold'])) + denom = np.maximum(np.abs(data2), np.abs(data1)) + threshold + relerror = np.abs(data2 - data1) / denom maxerror = np.nanmax(relerror.flat) - except ValueError as e: - print(e) - return 1 + except (ValueError, TypeError) as e: + build_logger.error("Could not compute error.", exc_info=e) + return TestResult.FAIL - tol = env['test_csv_tolerance'] - if maxerror > tol: # Threshold based on printing 6 digits in the CSV file - print("Files differ. %i / %i elements above specified tolerance (%f)" % - (np.sum(relerror > tol), relerror.size, tol)) - print(' row col reference test rel. error') - print(' ---- ---- -------------- -------------- ----------') - for i,j in itertools.product(*map(range, relerror.shape)): - if relerror[i,j] > tol: - row = i + headerRows + 1 - col = j + 1 - print(' % 4i % 4i % 14.7e % 14.7e % 10.4e' % - (row, col, data1[i,j], data2[i,j], relerror[i,j])) - return 1 - else: - return 0 + tol = env["test_csv_tolerance"] + if maxerror < tol: # Threshold based on printing 6 digits in the CSV file + return TestResult.PASS + + n_fail = np.sum(relerror > tol) + n_tot = relerror.size + message = [ + "Files differ.", + f"{n_fail:d} / {n_tot:d} elements above specified tolerance ({tol:f})", + " row col reference test rel. error", + " ---- ---- -------------- -------------- ----------", + ] + it = np.nditer([relerror, data1, data2], flags=["multi_index"]) + for rele, ref, test in it: + if rele > tol: + r = it.multi_index[0] + header_rows + 1 + c = it.multi_index[1] + 1 + message.append( + f" {r:4d} {c:4d} {ref:14.7e} {test:14.7e} {rele:10.4e}" + ) + build_logger.error("\n".join(message)) + return TestResult.FAIL def regression_test_message(target, source, env):