Geomap: Fix field lookup to match against the entire provided gazetteer (#71805)

Co-authored-by: Drew Slobodnjak <60050885+drew08t@users.noreply.github.com>
This commit is contained in:
Nathan Marrs
2023-07-19 00:09:47 +02:00
committed by GitHub
parent ef0de1af32
commit ad272b7984
2 changed files with 148 additions and 19 deletions

View File

@@ -116,4 +116,131 @@ describe('Lookup gazetteer', () => {
]
`);
});
it('goes through entire gazetteer to find matches', async () => {
const cfg = {
id: DataTransformerID.fieldLookup,
options: {
lookupField: 'location',
gazetteer: 'public/gazetteer/usa-states.json',
},
};
const data = toDataFrame({
name: 'locations',
fields: [
{
name: 'location',
type: FieldType.string,
values: ['AL', 'AK', 'Arizona', 'Arkansas', 'Somewhere', 'CO', 'CA'],
},
{ name: 'values', type: FieldType.number, values: [0, 10, 5, 1, 5, 1, 2] },
],
});
const matcher = fieldMatchers.get(FieldMatcherID.byName).get(cfg.options?.lookupField);
const frame = toDataFrame({
fields: [
{ name: 'id', values: ['AL', 'AK', 'AZ', 'MO', 'CO', 'CA', 'GA'] },
{ name: 'name', values: ['Alabama', 'Arkansas', 'Arizona', 'Missouri', 'Colorado', 'California', 'Georgia'] },
{ name: 'lng', values: [-80.891064, -100.891064, -111.891064, -92.302, -105.3272, -119.7462, -83.6487] },
{ name: 'lat', values: [12.448457, 24.448457, 33.448457, 38.4623, 39.0646, 36.17, 32.9866] },
],
});
const gaz = frameAsGazetter(frame, { path: 'path/to/gaz.json' });
const out = await addFieldsFromGazetteer([data], gaz, matcher)[0];
expect(out.fields).toMatchInlineSnapshot(`
[
{
"config": {},
"name": "location",
"type": "string",
"values": [
"AL",
"AK",
"Arizona",
"Arkansas",
"Somewhere",
"CO",
"CA",
],
},
{
"config": {},
"name": "id",
"type": "string",
"values": [
"AL",
"AK",
,
,
,
"CO",
"CA",
],
},
{
"config": {},
"name": "name",
"type": "string",
"values": [
"Alabama",
"Arkansas",
,
,
,
"Colorado",
"California",
],
},
{
"config": {},
"name": "lng",
"type": "number",
"values": [
-80.891064,
-100.891064,
,
,
,
-105.3272,
-119.7462,
],
},
{
"config": {},
"name": "lat",
"type": "number",
"values": [
12.448457,
24.448457,
,
,
,
39.0646,
36.17,
],
},
{
"config": {},
"name": "values",
"state": {
"displayName": "values",
"multipleFrames": false,
},
"type": "number",
"values": [
0,
10,
5,
1,
5,
1,
2,
],
},
]
`);
});
});

View File

@@ -28,49 +28,51 @@ export const fieldLookupTransformer: DataTransformerInfo<FieldLookupOptions> = {
async function doGazetteerXform(frames: DataFrame[], options: FieldLookupOptions): Promise<DataFrame[]> {
const fieldMatches = fieldMatchers.get(FieldMatcherID.byName).get(options?.lookupField);
const gaz = await getGazetteer(options?.gazetteer ?? COUNTRIES_GAZETTEER_PATH);
const gazetteer = await getGazetteer(options?.gazetteer ?? COUNTRIES_GAZETTEER_PATH);
if (!gaz.frame) {
if (!gazetteer.frame) {
return Promise.reject('missing frame in gazetteer');
}
return addFieldsFromGazetteer(frames, gaz, fieldMatches);
return addFieldsFromGazetteer(frames, gazetteer, fieldMatches);
}
export function addFieldsFromGazetteer(frames: DataFrame[], gaz: Gazetteer, matcher: FieldMatcher): DataFrame[] {
const src = gaz.frame!()?.fields;
if (!src) {
export function addFieldsFromGazetteer(frames: DataFrame[], gazetteer: Gazetteer, matcher: FieldMatcher): DataFrame[] {
const gazetteerFields = gazetteer.frame!()?.fields;
if (!gazetteerFields) {
return frames;
}
return frames.map((frame) => {
const length = frame.length;
const frameLength = frame.length;
const fields: Field[] = [];
for (const field of frame.fields) {
fields.push(field);
//if the field matches
if (matcher(field, frame, frames)) {
const values = field.values;
const sub: any[][] = [];
for (const f of src) {
const buffer = new Array(length);
sub.push(buffer);
fields.push({ ...f, values: buffer });
const gazetteerFieldValuesBuffer: any[][] = [];
for (const gazetteerField of gazetteerFields) {
const buffer = new Array(frameLength);
gazetteerFieldValuesBuffer.push(buffer);
fields.push({ ...gazetteerField, values: buffer });
}
// Add all values to the buffer
for (let v = 0; v < sub.length; v++) {
const found = gaz.find(values[v]);
if (found?.index != null) {
for (let i = 0; i < src.length; i++) {
sub[i][v] = src[i].values[found.index];
for (let valueIndex = 0; valueIndex < gazetteer.count!; valueIndex++) {
const foundValue = gazetteer.find(values[valueIndex]);
if (foundValue?.index != null) {
for (let fieldIndex = 0; fieldIndex < gazetteerFields.length; fieldIndex++) {
gazetteerFieldValuesBuffer[fieldIndex][valueIndex] = gazetteerFields[fieldIndex].values[foundValue.index];
}
}
}
}
}
return {
...frame,
fields,