Transformations: Add support for an inner join transformation (#53865)

This commit is contained in:
Alex Karacaoglu 2022-08-18 13:22:45 -04:00 committed by GitHub
parent fb40b80141
commit a3c1cd836e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 1095 additions and 486 deletions

View File

@ -568,3 +568,32 @@ Here is the result after adding a Limit transformation with a value of '3':
| 2020-07-07 11:34:20 | Temperature | 25 |
| 2020-07-07 11:34:20 | Humidity | 22 |
| 2020-07-07 10:32:20 | Humidity | 29 |
## Join by field (Inner join)
Use this transformation to combine the results from multiple queries (combining on a passed join field or the first time column) into one single result and drop rows where a successful join isn't able to occur - performing an inner join.
In the example below, we have two queries returning table data. It is visualized as two separate tables before applying the inner join transformation.
Query A:
| Time | Job | Uptime |
| ------------------- | ------- | --------- |
| 2020-07-07 11:34:20 | node | 25260122 |
| 2020-07-07 11:24:20 | postgre | 123001233 |
| 2020-07-07 11:14:20 | postgre | 345001233 |
Query B:
| Time | Server | Errors |
| ------------------- | -------- | ------ |
| 2020-07-07 11:34:20 | server 1 | 15 |
| 2020-07-07 11:24:20 | server 2 | 5 |
| 2020-07-07 11:04:20 | server 3 | 10 |
Result after applying the inner join transformation:
| Time | Job | Uptime | Server | Errors |
| ------------------- | ------- | --------- | -------- | ------ |
| 2020-07-07 11:34:20 | node | 25260122 | server 1 | 15 |
| 2020-07-07 11:24:20 | postgre | 123001233 | server 2 | 5 |

View File

@ -39,6 +39,7 @@
"d3-interpolate": "1.4.0",
"date-fns": "2.29.1",
"eventemitter3": "4.0.7",
"fast_array_intersect": "1.1.0",
"history": "4.10.1",
"lodash": "4.17.21",
"marked": "4.0.18",

View File

@ -15,6 +15,7 @@ export {
ByNamesMatcherMode,
} from './matchers/nameMatcher';
export type { RenameByRegexTransformerOptions } from './transformers/renameByRegex';
export { outerJoinDataFrames } from './transformers/joinDataFrames';
/** @deprecated -- will be removed in future versions */
export { joinDataFrames as outerJoinDataFrames } from './transformers/joinDataFrames';
export * from './transformers/histogram';
export { ensureTimeField } from './transformers/convertFieldType';

View File

@ -4,21 +4,21 @@ import { mockTransformationsRegistry } from '../../utils/tests/mockTransformatio
import { ArrayVector } from '../../vector';
import { calculateFieldTransformer } from './calculateField';
import { isLikelyAscendingVector, outerJoinDataFrames } from './joinDataFrames';
import { isLikelyAscendingVector, joinDataFrames } from './joinDataFrames';
import { JoinMode } from './seriesToColumns';
describe('align frames', () => {
beforeAll(() => {
mockTransformationsRegistry([calculateFieldTransformer]);
});
it('by first time field', () => {
describe('by first time field', () => {
const series1 = toDataFrame({
fields: [
{ name: 'TheTime', type: FieldType.time, values: [1000, 2000] },
{ name: 'A', type: FieldType.number, values: [1, 100] },
],
});
const series2 = toDataFrame({
fields: [
{ name: '_time', type: FieldType.time, values: [1000, 1500, 2000] },
@ -28,56 +28,106 @@ describe('align frames', () => {
],
});
const out = outerJoinDataFrames({ frames: [series1, series2] })!;
expect(
out.fields.map((f) => ({
name: f.name,
values: f.values.toArray(),
}))
).toMatchInlineSnapshot(`
Array [
Object {
"name": "TheTime",
"values": Array [
1000,
1500,
2000,
],
},
Object {
"name": "A",
"values": Array [
1,
undefined,
100,
],
},
Object {
"name": "A",
"values": Array [
2,
20,
200,
],
},
Object {
"name": "B",
"values": Array [
3,
30,
300,
],
},
Object {
"name": "C",
"values": Array [
"first",
"second",
"third",
],
},
]
`);
it('should perform an outer join', () => {
const out = joinDataFrames({ frames: [series1, series2] })!;
expect(
out.fields.map((f) => ({
name: f.name,
values: f.values.toArray(),
}))
).toMatchInlineSnapshot(`
Array [
Object {
"name": "TheTime",
"values": Array [
1000,
1500,
2000,
],
},
Object {
"name": "A",
"values": Array [
1,
undefined,
100,
],
},
Object {
"name": "A",
"values": Array [
2,
20,
200,
],
},
Object {
"name": "B",
"values": Array [
3,
30,
300,
],
},
Object {
"name": "C",
"values": Array [
"first",
"second",
"third",
],
},
]
`);
});
it('should perform an inner join', () => {
const out = joinDataFrames({ frames: [series1, series2], mode: JoinMode.inner })!;
expect(
out.fields.map((f) => ({
name: f.name,
values: f.values.toArray(),
}))
).toMatchInlineSnapshot(`
Array [
Object {
"name": "TheTime",
"values": Array [
1000,
2000,
],
},
Object {
"name": "A",
"values": Array [
1,
100,
],
},
Object {
"name": "A",
"values": Array [
2,
200,
],
},
Object {
"name": "B",
"values": Array [
3,
300,
],
},
Object {
"name": "C",
"values": Array [
"first",
"third",
],
},
]
`);
});
});
it('unsorted input keep indexes', () => {
@ -96,7 +146,7 @@ describe('align frames', () => {
],
});
let out = outerJoinDataFrames({ frames: [series1, series3], keepOriginIndices: true })!;
let out = joinDataFrames({ frames: [series1, series3], keepOriginIndices: true })!;
expect(
out.fields.map((f) => ({
name: f.name,
@ -151,7 +201,7 @@ describe('align frames', () => {
`);
// Fast path still adds origin indecies
out = outerJoinDataFrames({ frames: [series1], keepOriginIndices: true })!;
out = joinDataFrames({ frames: [series1], keepOriginIndices: true })!;
expect(
out.fields.map((f) => ({
name: f.name,
@ -189,7 +239,7 @@ describe('align frames', () => {
],
});
const out = outerJoinDataFrames({ frames: [series1], keepOriginIndices: true })!;
const out = joinDataFrames({ frames: [series1], keepOriginIndices: true })!;
expect(
out.fields.map((f) => ({
name: f.name,
@ -236,7 +286,7 @@ describe('align frames', () => {
],
});
const out = outerJoinDataFrames({ frames: [series1, series3] })!;
const out = joinDataFrames({ frames: [series1, series3] })!;
expect(
out.fields.map((f) => ({
name: f.name,

View File

@ -1,9 +1,13 @@
import intersect from 'fast_array_intersect';
import { getTimeField, sortDataFrame } from '../../dataframe';
import { DataFrame, Field, FieldMatcher, FieldType, Vector } from '../../types';
import { ArrayVector } from '../../vector';
import { fieldMatchers } from '../matchers';
import { FieldMatcherID } from '../matchers/ids';
import { JoinMode } from './seriesToColumns';
export function pickBestJoinField(data: DataFrame[]): FieldMatcher {
const { timeField } = getTimeField(data[0]);
if (timeField) {
@ -52,6 +56,11 @@ export interface JoinOptions {
* @internal -- used when we need to keep a reference to the original frame/field index
*/
keepOriginIndices?: boolean;
/**
* @internal -- Optionally specify a join mode (outer or inner)
*/
mode?: JoinMode;
}
function getJoinMatcher(options: JoinOptions): FieldMatcher {
@ -77,7 +86,7 @@ export function maybeSortFrame(frame: DataFrame, fieldIdx: number) {
* This will return a single frame joined by the first matching field. When a join field is not specified,
* the default will use the first time field
*/
export function outerJoinDataFrames(options: JoinOptions): DataFrame | undefined {
export function joinDataFrames(options: JoinOptions): DataFrame | undefined {
if (!options.frames?.length) {
return;
}
@ -211,7 +220,7 @@ export function outerJoinDataFrames(options: JoinOptions): DataFrame | undefined
allData.push(a);
}
const joined = join(allData, nullModes);
const joined = join(allData, nullModes, options.mode);
return {
// ...options.data[0], // keep name, meta?
@ -272,16 +281,23 @@ function nullExpand(yVals: Array<number | null>, nullIdxs: number[], alignedLen:
}
// nullModes is a tables-matched array indicating how to treat nulls in each series
export function join(tables: AlignedData[], nullModes?: number[][]) {
const xVals = new Set<number>();
export function join(tables: AlignedData[], nullModes?: number[][], mode: JoinMode = JoinMode.outer) {
let xVals: Set<number>;
for (let ti = 0; ti < tables.length; ti++) {
let t = tables[ti];
let xs = t[0];
let len = xs.length;
if (mode === JoinMode.inner) {
// @ts-ignore
xVals = new Set(intersect(tables.map((t) => t[0])));
} else {
xVals = new Set();
for (let i = 0; i < len; i++) {
xVals.add(xs[i]);
for (let ti = 0; ti < tables.length; ti++) {
let t = tables[ti];
let xs = t[0];
let len = xs.length;
for (let i = 0; i < len; i++) {
xVals.add(xs[i]);
}
}
}

View File

@ -5,10 +5,16 @@ import { fieldMatchers } from '../matchers';
import { FieldMatcherID } from '../matchers/ids';
import { DataTransformerID } from './ids';
import { outerJoinDataFrames } from './joinDataFrames';
import { joinDataFrames } from './joinDataFrames';
export enum JoinMode {
outer = 'outer',
inner = 'inner',
}
export interface SeriesToColumnsOptions {
byField?: string; // empty will pick the field automatically
mode?: JoinMode;
}
export const seriesToColumnsTransformer: SynchronousDataTransformerInfo<SeriesToColumnsOptions> = {
@ -17,6 +23,7 @@ export const seriesToColumnsTransformer: SynchronousDataTransformerInfo<SeriesTo
description: 'Groups series by field and returns values as columns',
defaultOptions: {
byField: undefined, // DEFAULT_KEY_FIELD,
mode: JoinMode.outer,
},
operator: (options) => (source) => source.pipe(map((data) => seriesToColumnsTransformer.transformer(options)(data))),
@ -28,7 +35,7 @@ export const seriesToColumnsTransformer: SynchronousDataTransformerInfo<SeriesTo
if (options.byField && !joinBy) {
joinBy = fieldMatchers.get(FieldMatcherID.byName).get(options.byField);
}
const joined = outerJoinDataFrames({ frames: data, joinBy });
const joined = joinDataFrames({ frames: data, joinBy, mode: options.mode });
if (joined) {
return [joined];
}

View File

@ -4834,6 +4834,7 @@ __metadata:
date-fns: 2.29.1
esbuild: ^0.14.47
eventemitter3: 4.0.7
fast_array_intersect: 1.1.0
history: 4.10.1
lodash: 4.17.21
marked: 4.0.18
@ -20344,6 +20345,13 @@ __metadata:
languageName: node
linkType: hard
"fast_array_intersect@npm:1.1.0":
version: 1.1.0
resolution: "fast_array_intersect@npm:1.1.0"
checksum: 3bd65089e84f3eb2b378d346b741fe333183fdf3c14166ffeafd228e40b035703d82a3a9e645c8714c2f2b399140f6f0991a1c0344b0a8cf05697a48365cbadc
languageName: node
linkType: hard
"fastest-levenshtein@npm:^1.0.12":
version: 1.0.12
resolution: "fastest-levenshtein@npm:1.0.12"