Spreadsheet: Convert to DataFrame (#67170)

This commit is contained in:
Ryan McKinley 2023-04-25 07:47:22 -07:00 committed by GitHub
parent 39a3d85514
commit 93348c2a17
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 351 additions and 8 deletions

View File

@ -0,0 +1,183 @@
import { utils } from 'xlsx';
import { DataFrame } from '@grafana/data';
import { workSheetToFrame } from './sheet';
describe('sheets', () => {
it('will use first row as names', () => {
const sheet = utils.aoa_to_sheet([
['Number', 'String', 'Bool', 'Date', 'Object'],
[1, 'A', true, Date.UTC(2020, 1, 1), { hello: 'world' }],
[2, 'B', false, Date.UTC(2020, 1, 2), { hello: 'world' }],
]);
const frame = workSheetToFrame(sheet);
expect(toSnapshotFrame(frame)).toMatchInlineSnapshot(`
[
{
"name": "Number",
"type": "number",
"values": [
1,
2,
],
},
{
"name": "String",
"type": "string",
"values": [
"A",
"B",
],
},
{
"name": "Bool",
"type": "boolean",
"values": [
true,
false,
],
},
{
"name": "Date",
"type": "number",
"values": [
1580515200000,
1580601600000,
],
},
{
"name": "Object",
"type": "string",
"values": [
undefined,
undefined,
],
},
]
`);
});
it('will use calculated data when cells are typed', () => {
const sheet = utils.aoa_to_sheet([
[1, 'A', true, Date.UTC(2020, 1, 1), { hello: 'world' }],
[2, 'B', false, Date.UTC(2020, 1, 2), { hello: 'world' }],
[3, 'C', true, Date.UTC(2020, 1, 3), { hello: 'world' }],
]);
const frame = workSheetToFrame(sheet);
expect(toSnapshotFrame(frame)).toMatchInlineSnapshot(`
[
{
"name": "A",
"type": "number",
"values": [
1,
2,
3,
],
},
{
"name": "B",
"type": "string",
"values": [
"A",
"B",
"C",
],
},
{
"name": "C",
"type": "boolean",
"values": [
true,
false,
true,
],
},
{
"name": "D",
"type": "number",
"values": [
1580515200000,
1580601600000,
1580688000000,
],
},
{
"name": "E",
"type": "string",
"values": [
undefined,
undefined,
undefined,
],
},
]
`);
});
it('is OK with nulls and undefineds, and misalignment', () => {
const sheet = utils.aoa_to_sheet([
[null, 'A', true],
[2, 'B', null, Date.UTC(2020, 1, 2), { hello: 'world' }],
[3, 'C', true, undefined, { hello: 'world' }],
]);
const frame = workSheetToFrame(sheet);
expect(toSnapshotFrame(frame)).toMatchInlineSnapshot(`
[
{
"name": "A",
"type": "number",
"values": [
undefined,
2,
3,
],
},
{
"name": "B",
"type": "string",
"values": [
"A",
"B",
"C",
],
},
{
"name": "C",
"type": "boolean",
"values": [
true,
undefined,
true,
],
},
{
"name": "D",
"type": "number",
"values": [
undefined,
1580601600000,
undefined,
],
},
{
"name": "E",
"type": "string",
"values": [
undefined,
undefined,
undefined,
],
},
]
`);
});
});
function toSnapshotFrame(frame: DataFrame) {
return frame.fields.map((f) => ({ name: f.name, values: f.values, type: f.type }));
}

View File

@ -1,12 +1,172 @@
import { read, utils } from 'xlsx';
import { read, utils, WorkSheet, WorkBook, Range, ColInfo, CellObject, ExcelDataType } from 'xlsx';
import { ArrayDataFrame, DataFrame } from '@grafana/data';
import { DataFrame, FieldType } from '@grafana/data';
export function readSpreadsheet(file: ArrayBuffer): DataFrame[] {
const wb = read(file, { type: 'buffer' });
return wb.SheetNames.map((name) => {
const frame = new ArrayDataFrame(utils.sheet_to_json(wb.Sheets[name]));
frame.name = name;
return frame;
});
return workBookToFrames(read(file, { type: 'buffer' }));
}
export function workBookToFrames(wb: WorkBook): DataFrame[] {
return wb.SheetNames.map((name) => workSheetToFrame(wb.Sheets[name], name));
}
export function workSheetToFrame(sheet: WorkSheet, name?: string): DataFrame {
const columns = sheetAsColumns(sheet);
if (!columns?.length) {
return {
fields: [],
name: name,
length: 0,
};
}
return {
fields: columns.map((c, idx) => {
let type = FieldType.string;
let values: unknown[] = [];
switch (c.type ?? 's') {
case 'b':
type = FieldType.boolean;
values = c.data.map((v) => (v?.v == null ? v?.v : Boolean(v.v)));
break;
case 'n':
type = FieldType.number;
values = c.data.map((v) => (v?.v == null ? v?.v : +v.v));
break;
case 'd':
type = FieldType.time;
values = c.data.map((v) => (v?.v == null ? v?.v : +v.v)); // ???
break;
default:
type = FieldType.string;
values = c.data.map((v) => (v?.v == null ? v?.v : utils.format_cell(v)));
break;
}
return {
name: c.name,
config: {}, // TODO? we could apply decimal formatting from worksheet
type,
values,
};
}),
name: name,
length: columns[0].data.length,
};
}
interface ColumnData {
index: number;
name: string;
info?: ColInfo;
data: CellObject[];
type?: ExcelDataType;
}
function sheetAsColumns(sheet: WorkSheet): ColumnData[] | null {
const r = sheet['!ref'];
if (!r) {
return null;
}
const columnInfo = sheet['!cols'];
const cols: ColumnData[] = [];
const range = safe_decode_range(r);
const types = new Set<ExcelDataType>();
let firstRowIsHeader = true;
for (let c = range.s.c; c <= range.e.c; ++c) {
types.clear();
const info = columnInfo?.[c] ?? {};
if (info.hidden) {
continue; // skip the column
}
const field: ColumnData = {
index: c,
name: utils.encode_col(c),
data: [],
info,
};
const pfix = utils.encode_col(c);
for (let r = range.s.r; r <= range.e.r; ++r) {
const cell = sheet[pfix + utils.encode_row(r)];
if (cell) {
if (field.data.length) {
types.add(cell.t);
} else if (cell.t !== 's') {
firstRowIsHeader = false;
}
}
field.data.push(cell);
}
cols.push(field);
if (types.size === 1) {
field.type = Array.from(types)[0];
}
}
if (firstRowIsHeader) {
return cols.map((c) => {
const first = c.data[0];
if (first?.v) {
c.name = utils.format_cell(first);
}
c.data = c.data.slice(1);
return c;
});
}
return cols;
}
/**
* Copied from Apache 2 licensed sheetjs:
* https://git.sheetjs.com/sheetjs/sheetjs/src/branch/master/xlsx.flow.js#L4338
*/
function safe_decode_range(range: string): Range {
let o = { s: { c: 0, r: 0 }, e: { c: 0, r: 0 } };
let idx = 0,
i = 0,
cc = 0;
let len = range.length;
for (idx = 0; i < len; ++i) {
if ((cc = range.charCodeAt(i) - 64) < 1 || cc > 26) {
break;
}
idx = 26 * idx + cc;
}
o.s.c = --idx;
for (idx = 0; i < len; ++i) {
if ((cc = range.charCodeAt(i) - 48) < 0 || cc > 9) {
break;
}
idx = 10 * idx + cc;
}
o.s.r = --idx;
if (i === len || cc !== 10) {
o.e.c = o.s.c;
o.e.r = o.s.r;
return o;
}
++i;
for (idx = 0; i !== len; ++i) {
if ((cc = range.charCodeAt(i) - 64) < 1 || cc > 26) {
break;
}
idx = 26 * idx + cc;
}
o.e.c = --idx;
for (idx = 0; i !== len; ++i) {
if ((cc = range.charCodeAt(i) - 48) < 0 || cc > 9) {
break;
}
idx = 10 * idx + cc;
}
o.e.r = --idx;
return o;
}