chore: use grafana/experimental for dashboard AI feature (#76948)

The 'llms' module of @grafana/experimental was vendored into the
GenAI components at some point in the past to speed up iteration, so
that dashboard feature development didn't have to wait for a new
release of the experimental package for every change. Now that we
think this has stabilized, we can remove the vendored module from
the Grafana codebase and use the version from @grafana/experimental
instead.

This requires a couple of minor changes, mostly around health check
results which are slightly different in @grafana/experimental now,
but otherwise the changes are mechanical.

Co-authored-by: nmarrs <nathanielmarrs@gmail.com>
This commit is contained in:
Ben Sully 2023-10-25 16:38:55 +01:00 committed by GitHub
parent f9fc2e4568
commit eb62e02259
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 22 additions and 443 deletions

View File

@ -3034,10 +3034,6 @@ exports[`better eslint`] = {
"public/app/features/dashboard/components/DeleteDashboard/DeleteDashboardModal.tsx:5381": [
[0, 0, 0, "Styles should be written using objects.", "0"]
],
"public/app/features/dashboard/components/GenAI/llms/openai.ts:5381": [
[0, 0, 0, "Do not use any type assertions.", "0"],
[0, 0, 0, "Do not use any type assertions.", "1"]
],
"public/app/features/dashboard/components/HelpWizard/HelpWizard.tsx:5381": [
[0, 0, 0, "Styles should be written using objects.", "0"],
[0, 0, 0, "Styles should be written using objects.", "1"],

View File

@ -2,15 +2,15 @@ import { useCallback, useEffect, useState } from 'react';
import { useAsync } from 'react-use';
import { Subscription } from 'rxjs';
import { llms } from '@grafana/experimental';
import { logError } from '@grafana/runtime';
import { useAppNotification } from 'app/core/copy/appNotification';
import { openai } from './llms';
import { isLLMPluginEnabled, OPEN_AI_MODEL } from './utils';
// Declared instead of imported from utils to make this hook modular
// Ideally we will want to move the hook itself to a different scope later.
type Message = openai.Message;
type Message = llms.openai.Message;
export enum StreamStatus {
IDLE = 'idle',
@ -76,7 +76,7 @@ export function useOpenAIStream(
setStreamStatus(StreamStatus.GENERATING);
setError(undefined);
// Stream the completions. Each element is the next stream chunk.
const stream = openai
const stream = llms.openai
.streamChatCompletions({
model,
temperature,
@ -85,7 +85,7 @@ export function useOpenAIStream(
.pipe(
// Accumulate the stream content into a stream of strings, where each
// element contains the accumulated message so far.
openai.accumulateContent()
llms.openai.accumulateContent()
// The stream is just a regular Observable, so we can use standard rxjs
// functionality to update state, e.g. recording when the stream
// has completed.

View File

@ -1,19 +0,0 @@
import { SemVer } from 'semver';
import { logWarning } from '@grafana/runtime';
export const LLM_PLUGIN_ID = 'grafana-llm-app';
export const LLM_PLUGIN_ROUTE = `/api/plugins/${LLM_PLUGIN_ID}`;
// The LLM app was at version 0.2.0 before we added the health check.
// If the health check fails, or the details don't exist on the response,
// we should assume it's this older version.
export let LLM_PLUGIN_VERSION = new SemVer('0.2.0');
export function setLLMPluginVersion(version: string) {
try {
LLM_PLUGIN_VERSION = new SemVer(version);
} catch (e) {
logWarning('Failed to parse version of grafana-llm-app; assuming old version is present.');
}
}

View File

@ -1,2 +0,0 @@
// This is here to mimic the structure of imports from @grafana/experimental while switching over
export * as openai from './openai';

View File

@ -1,386 +0,0 @@
/**
* OpenAI API client.
*
* This module contains functions used to make requests to the OpenAI API via
* the Grafana LLM app plugin. That plugin must be installed, enabled and configured
* in order for these functions to work.
*
* The {@link enabled} function can be used to check if the plugin is enabled and configured.
*/
import { pipe, Observable, UnaryFunction } from 'rxjs';
import { filter, map, scan, takeWhile, tap } from 'rxjs/operators';
import {
isLiveChannelMessageEvent,
LiveChannelAddress,
LiveChannelMessageEvent,
LiveChannelScope,
} from '@grafana/data';
import { getBackendSrv, getGrafanaLiveSrv, logDebug } from '@grafana/runtime';
import { LLM_PLUGIN_ID, LLM_PLUGIN_ROUTE, setLLMPluginVersion } from './constants';
import { LLMAppSettings } from './types';
const OPENAI_CHAT_COMPLETIONS_PATH = 'openai/v1/chat/completions';
/** The role of a message's author. */
export type Role = 'system' | 'user' | 'assistant' | 'function';
/** A message in a conversation. */
export interface Message {
/** The role of the message's author. */
role: Role;
/** The contents of the message. content is required for all messages, and may be null for assistant messages with function calls. */
content: string;
/**
* The name of the author of this message.
*
* This is required if role is 'function', and it should be the name of the function whose response is in the content.
*
* May contain a-z, A-Z, 0-9, and underscores, with a maximum length of 64 characters.
*/
name?: string;
/**
* The name and arguments of a function that should be called, as generated by the model.
*/
function_call?: Object;
}
/** A function the model may generate JSON inputs for. */
export interface Function {
/**
* The name of the function to be called.
*
* Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
*/
name: string;
/**
* A description of what the function does, used by the model to choose when and how to call the function.
*/
description?: string;
/*
* The parameters the functions accepts, described as a JSON Schema object. See the OpenAI guide for examples, and the JSON Schema reference for documentation about the format.
*
* To describe a function that accepts no parameters, provide the value {"type": "object", "properties": {}}.
*/
parameters: Object;
}
export interface ChatCompletionsRequest {
/**
* ID of the model to use.
*
* See the model endpoint compatibility table for details on which models work with the Chat Completions API.
*/
model: string;
/** A list of messages comprising the conversation so far. */
messages: Message[];
/** A list of functions the model may generate JSON inputs for. */
functions?: Function[];
/**
* Controls how the model responds to function calls.
*
* "none" means the model does not call a function, and responds to the end-user.
* "auto" means the model can pick between an end-user or calling a function.
* Specifying a particular function via {"name": "my_function"} forces the model to call that function.
*
* "none" is the default when no functions are present. "auto" is the default if functions are present.
*/
function_call?: 'none' | 'auto' | { name: string };
/**
* What sampling temperature to use, between 0 and 2.
* Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
*
* We generally recommend altering this or top_p but not both.
*/
temperature?: number;
/**
* An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass.
* So 0.1 means only the tokens comprising the top 10% probability mass are considered.
*
* We generally recommend altering this or temperature but not both.
*/
top_p?: number;
/**
* How many chat completion choices to generate for each input message.
*/
n?: number;
/**
* Up to 4 sequences where the API will stop generating further tokens.
*/
stop?: string | string[];
/**
* The maximum number of tokens to generate in the chat completion.
*
* The total length of input tokens and generated tokens is limited by the model's context length. Example Python code for counting tokens.
*/
max_tokens?: number;
/**
* Number between -2.0 and 2.0.
*
* Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
*/
presence_penalty?: number;
/**
* Number between -2.0 and 2.0.
*
* Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
*/
frequency_penalty?: number;
/**
* Modify the likelihood of specified tokens appearing in the completion.
*
* Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100.
* Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model,
* but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban
* or exclusive selection of the relevant token.
*/
logit_bias?: { [key: string]: number };
/**
* A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
*/
user?: string;
}
/** A completion object from an OpenAI model. */
export interface Choice {
/** The message object generated by the model. */
message: Message;
/**
* The reason the model stopped generating text.
*
* This may be one of:
* - stop: API returned complete message, or a message terminated by one of the stop sequences provided via the stop parameter
* - length: incomplete model output due to max_tokens parameter or token limit
* - function_call: the model decided to call a function
* - content_filter: omitted content due to a flag from our content filters
* - null: API response still in progress or incomplete
*/
finish_reason: string;
/** The index of the completion in the list of choices. */
index: number;
}
/** The usage statistics for a request to OpenAPI. */
export interface Usage {
/** The number of tokens in the prompt. */
prompt_tokens: number;
/** The number of tokens in the completion. */
completion_tokens: number;
/** The total number of tokens. */
total_tokens: number;
}
/** The error response from the Grafana LLM app when trying to call the chat completions API. */
interface ChatCompletionsErrorResponse {
/** The error message. */
error: string;
}
/** A response from the OpenAI Chat Completions API. */
export interface ChatCompletionsResponse<T = Choice> {
/** The ID of the request. */
id: string;
/** The type of object returned (e.g. 'chat.completion'). */
object: string;
/** The timestamp of the request, as a UNIX timestamp. */
created: number;
/** The name of the model used to generate the response. */
model: string;
/** A list of completion objects (only one, unless `n > 1` in the request). */
choices: T[];
/** The number of tokens used to generate the replies, counting prompt, completion, and total. */
usage: Usage;
}
/** A content message returned from the model. */
export interface ContentMessage {
/** The content of the message. */
content: string;
}
/** A message returned from the model indicating that it is done. */
export interface DoneMessage {
done: boolean;
}
/** A function call message returned from the model. */
export interface FunctionCallMessage {
/** The name of the function to call. */
name: string;
/** JSON string for the arguments to the function call. */
arguments: string;
}
/**
* A delta returned from a stream of chat completion responses.
*
* In practice this will be either a content message or a function call;
* done messages are filtered out by the `streamChatCompletions` function.
*/
export type ChatCompletionsDelta = ContentMessage | FunctionCallMessage | DoneMessage;
/** A chunk included in a chat completion response. */
export interface ChatCompletionsChunk {
/** The delta since the previous chunk. */
delta: ChatCompletionsDelta;
}
/** Return true if the message is a 'content' message. */
export function isContentMessage(message: unknown): message is ContentMessage {
return typeof message === 'object' && message !== null && 'content' in message;
}
/** Return true if the message is a 'done' message. */
export function isDoneMessage(message: unknown): message is DoneMessage {
return typeof message === 'object' && message !== null && 'done' in message;
}
/** Return true if the response is an error response. */
export function isErrorResponse(response: unknown): response is ChatCompletionsErrorResponse {
return typeof response === 'object' && response !== null && 'error' in response;
}
/**
* An rxjs operator that extracts the content messages from a stream of chat completion responses.
*
* @returns An observable that emits the content messages. Each emission will be a string containing the
* token emitted by the model.
* @example <caption>Example of reading all tokens in a stream.</caption>
* const stream = streamChatCompletions({ model: 'gpt-3.5-turbo', messages: [
* { role: 'system', content: 'You are a great bot.' },
* { role: 'user', content: 'Hello, bot.' },
* ]}).pipe(extractContent());
* stream.subscribe({ next: console.log, error: console.error });
* // Output:
* // ['Hello', '? ', 'How ', 'are ', 'you', '?']
*/
export function extractContent(): UnaryFunction<
Observable<ChatCompletionsResponse<ChatCompletionsChunk>>,
Observable<string>
> {
return pipe(
filter((response: ChatCompletionsResponse<ChatCompletionsChunk>) => isContentMessage(response.choices[0].delta)),
// The type assertion is needed here because the type predicate above doesn't seem to propagate.
map(
(response: ChatCompletionsResponse<ChatCompletionsChunk>) => (response.choices[0].delta as ContentMessage).content
)
);
}
/**
* An rxjs operator that accumulates the content messages from a stream of chat completion responses.
*
* @returns An observable that emits the accumulated content messages. Each emission will be a string containing the
* content of all messages received so far.
* @example
* const stream = streamChatCompletions({ model: 'gpt-3.5-turbo', messages: [
* { role: 'system', content: 'You are a great bot.' },
* { role: 'user', content: 'Hello, bot.' },
* ]}).pipe(accumulateContent());
* stream.subscribe({ next: console.log, error: console.error });
* // Output:
* // ['Hello', 'Hello! ', 'Hello! How ', 'Hello! How are ', 'Hello! How are you', 'Hello! How are you?']
*/
export function accumulateContent(): UnaryFunction<
Observable<ChatCompletionsResponse<ChatCompletionsChunk>>,
Observable<string>
> {
return pipe(
extractContent(),
scan((acc, curr) => acc + curr, '')
);
}
/**
* Make a request to OpenAI's chat-completions API via the Grafana LLM plugin proxy.
*/
export async function chatCompletions(request: ChatCompletionsRequest): Promise<ChatCompletionsResponse> {
const response = await getBackendSrv().post<ChatCompletionsResponse>(
'/api/plugins/grafana-llm-app/resources/openai/v1/chat/completions',
request,
{
headers: { 'Content-Type': 'application/json' },
}
);
return response;
}
/**
* Make a streaming request to OpenAI's chat-completions API via the Grafana LLM plugin proxy.
*
* A stream of tokens will be returned as an `Observable<string>`. Use the `extractContent` operator to
* filter the stream to only content messages, or the `accumulateContent` operator to obtain a stream of
* accumulated content messages.
*
* The 'done' message will not be emitted; the stream will simply end when this message is encountered.
*
* @example <caption>Example of reading all tokens in a stream.</caption>
* const stream = streamChatCompletions({ model: 'gpt-3.5-turbo', messages: [
* { role: 'system', content: 'You are a great bot.' },
* { role: 'user', content: 'Hello, bot.' },
* ]}).pipe(extractContent());
* stream.subscribe({ next: console.log, error: console.error });
* // Output:
* // ['Hello', '? ', 'How ', 'are ', 'you', '?']
*
* @example <caption>Example of accumulating tokens in a stream.</caption>
* const stream = streamChatCompletions({ model: 'gpt-3.5-turbo', messages: [
* { role: 'system', content: 'You are a great bot.' },
* { role: 'user', content: 'Hello, bot.' },
* ]}).pipe(accumulateContent());
* stream.subscribe({ next: console.log, error: console.error });
* // Output:
* // ['Hello', 'Hello! ', 'Hello! How ', 'Hello! How are ', 'Hello! How are you', 'Hello! How are you?']
*/
export function streamChatCompletions(
request: ChatCompletionsRequest
): Observable<ChatCompletionsResponse<ChatCompletionsChunk>> {
const channel: LiveChannelAddress = {
scope: LiveChannelScope.Plugin,
namespace: LLM_PLUGIN_ID,
path: OPENAI_CHAT_COMPLETIONS_PATH + '/' + self.crypto.randomUUID(),
data: request,
};
const messages = getGrafanaLiveSrv()
.getStream(channel)
.pipe(filter((event) => isLiveChannelMessageEvent(event))) as Observable<
LiveChannelMessageEvent<ChatCompletionsResponse<ChatCompletionsChunk>>
>;
return messages.pipe(
tap((event) => {
if (isErrorResponse(event.message)) {
throw new Error(event.message.error);
}
}),
takeWhile((event) => isErrorResponse(event.message) || !isDoneMessage(event.message.choices[0].delta)),
map((event) => event.message)
);
}
let loggedWarning = false;
/** Check if the OpenAI API is enabled via the LLM plugin. */
export const enabled = async () => {
try {
const settings: LLMAppSettings = await getBackendSrv().get(`${LLM_PLUGIN_ROUTE}/settings`, undefined, undefined, {
showSuccessAlert: false,
showErrorAlert: false,
});
setLLMPluginVersion(settings.info.version);
return settings.enabled ?? false;
} catch (e) {
if (!loggedWarning) {
logDebug(String(e));
logDebug(
'Failed to check if OpenAI is enabled. This is expected if the Grafana LLM plugin is not installed, and the above error can be ignored.'
);
loggedWarning = true;
}
return false;
}
};

View File

@ -1,14 +0,0 @@
export type LLMAppHealthCheck = {
details: {
openAI?: boolean;
vector?: boolean;
version?: string;
};
};
export type LLMAppSettings = {
enabled: boolean;
info: {
version: string;
};
};

View File

@ -1,14 +1,17 @@
import { llms } from '@grafana/experimental';
import { createDashboardModelFixture, createPanelSaveModel } from '../../state/__fixtures__/dashboardFixtures';
import { openai } from './llms';
import { getDashboardChanges, isLLMPluginEnabled, sanitizeReply } from './utils';
// Mock the llms.openai module
jest.mock('./llms', () => ({
openai: {
streamChatCompletions: jest.fn(),
accumulateContent: jest.fn(),
enabled: jest.fn(),
jest.mock('@grafana/experimental', () => ({
llms: {
openai: {
streamChatCompletions: jest.fn(),
accumulateContent: jest.fn(),
enabled: jest.fn(),
},
},
}));
@ -87,7 +90,7 @@ describe('getDashboardChanges', () => {
describe('isLLMPluginEnabled', () => {
it('should return true if LLM plugin is enabled', async () => {
// Mock llms.openai.enabled to return true
jest.mocked(openai.enabled).mockResolvedValue(true);
jest.mocked(llms.openai.enabled).mockResolvedValue({ ok: true, configured: false });
const enabled = await isLLMPluginEnabled();
@ -96,7 +99,7 @@ describe('isLLMPluginEnabled', () => {
it('should return false if LLM plugin is not enabled', async () => {
// Mock llms.openai.enabled to return false
jest.mocked(openai.enabled).mockResolvedValue(false);
jest.mocked(llms.openai.enabled).mockResolvedValue({ ok: false, configured: false });
const enabled = await isLLMPluginEnabled();

View File

@ -1,7 +1,8 @@
import { llms } from '@grafana/experimental';
import { DashboardModel, PanelModel } from '../../state';
import { getDashboardStringDiff } from './jsonDiffText';
import { openai } from './llms';
export enum Role {
// System content cannot be overwritten by user prompts.
@ -11,7 +12,7 @@ export enum Role {
'user' = 'user',
}
export type Message = openai.Message;
export type Message = llms.openai.Message;
export enum QuickFeedbackType {
Shorter = 'Even shorter',
@ -53,13 +54,13 @@ export function getDashboardChanges(dashboard: DashboardModel): {
}
/**
* Check if the LLM plugin is enabled and configured.
* @returns true if the LLM plugin is enabled and configured.
* Check if the LLM plugin is enabled.
* @returns true if the LLM plugin is enabled.
*/
export async function isLLMPluginEnabled() {
// Check if the LLM plugin is enabled and configured.
// Check if the LLM plugin is enabled.
// If not, we won't be able to make requests, so return early.
return await openai.enabled();
return llms.openai.enabled().then((response) => response.ok);
}
/**