mirror of
https://github.com/grafana/grafana.git
synced 2024-12-25 08:21:46 -06:00
Profiling: Enhance configuration and docs (#90048)
Co-authored-by: Dave Henderson <dave.henderson@grafana.com>
This commit is contained in:
parent
8a6e92c0c9
commit
fc8a5cf468
@ -2,7 +2,7 @@
|
||||
init_cmds = [
|
||||
["GO_BUILD_DEV=1", "make", "build-go"],
|
||||
["make", "gen-jsonnet"],
|
||||
["./bin/grafana", "server", "-profile", "-profile-addr=127.0.0.1", "-profile-port=6000", "-packaging=dev", "cfg:app_mode=development"]
|
||||
["./bin/grafana", "server", "-profile", "-profile-addr=127.0.0.1", "-profile-port=6000", "-profile-block-rate=1", "-profile-mutex-rate=5", "-packaging=dev", "cfg:app_mode=development"]
|
||||
]
|
||||
watch_all = true
|
||||
follow_symlinks = true
|
||||
@ -18,5 +18,5 @@ build_delay = 1500
|
||||
cmds = [
|
||||
["GO_BUILD_DEV=1", "make", "build-go"],
|
||||
["make", "gen-jsonnet"],
|
||||
["./bin/grafana", "server", "-profile", "-profile-addr=127.0.0.1", "-profile-port=6000", "-packaging=dev", "cfg:app_mode=development"]
|
||||
["./bin/grafana", "server", "-profile", "-profile-addr=127.0.0.1", "-profile-port=6000", "-profile-block-rate=1", "-profile-mutex-rate=5", "-packaging=dev", "cfg:app_mode=development"]
|
||||
]
|
||||
|
@ -2,31 +2,48 @@
|
||||
aliases:
|
||||
- ../../troubleshooting/diagnostics/
|
||||
- ../enable-diagnostics/
|
||||
description: Learn how to configure tracing so that you can troubleshoot Grafana.
|
||||
description: Learn how to configure profiling and tracing so that you can troubleshoot Grafana.
|
||||
keywords:
|
||||
- grafana
|
||||
- troubleshooting
|
||||
- documentation
|
||||
- guide
|
||||
labels:
|
||||
products:
|
||||
- enterprise
|
||||
- oss
|
||||
menuTitle: Configure tracing
|
||||
title: Configure tracing to troubleshoot Grafana
|
||||
menuTitle: Configure profiling and tracing
|
||||
title: Configure profiling and tracing to troubleshoot Grafana
|
||||
weight: 200
|
||||
---
|
||||
|
||||
# Configure tracing to troubleshoot Grafana
|
||||
# Configure profiling and tracing to troubleshoot Grafana
|
||||
|
||||
You can set up the `grafana-server` process to enable certain diagnostics when it starts. This can be useful
|
||||
when investigating certain performance problems. It's _not_ recommended to have these enabled by default.
|
||||
|
||||
## Turn on profiling
|
||||
## Turn on profiling and collect profiles
|
||||
|
||||
The `grafana-server` can be started with the command-line option `-profile` to enable profiling, `-profile-addr` to override the default HTTP address (`localhost`), and
|
||||
`-profile-port` to override the default HTTP port (`6060`) where the `pprof` debugging endpoints are available. For example:
|
||||
`-profile-port` to override the default HTTP port (`6060`) where the `pprof` debugging endpoints are available. Further, [`-profile-block-rate`](https://pkg.go.dev/runtime#SetBlockProfileRate) controls the fraction of goroutine blocking events that are reported in the blocking profile, default `1` (i.e. track every event) for backward compatibility reasons, and [`-profile-mutex-rate`](https://pkg.go.dev/runtime#SetMutexProfileFraction) controls the fraction of mutex contention events that are reported in the mutex profile, default `0` (i.e. track no events). The higher the fraction (that is, the smaller this value) the more overhead it adds to normal operations.
|
||||
|
||||
Running Grafana with profiling enabled and without block and mutex profiling enabled should only add a fraction of overhead and is suitable for [continuous profiling](https://grafana.com/oss/pyroscope/). Adding a small fraction of block and mutex profiling, such as 10-5 (10%-20%) should in general be fine.
|
||||
|
||||
Enable profiling:
|
||||
|
||||
```bash
|
||||
./grafana server -profile -profile-addr=0.0.0.0 -profile-port=8080
|
||||
```
|
||||
|
||||
Note that `pprof` debugging endpoints are served on a different port than the Grafana HTTP server.
|
||||
Enable profiling with block and mutex profiling enabled with a fraction of 20%:
|
||||
|
||||
```bash
|
||||
./grafana server -profile -profile-addr=0.0.0.0 -profile-port=8080 -profile-block-rate=5 -profile-mutex-rate=5
|
||||
```
|
||||
|
||||
Note that `pprof` debugging endpoints are served on a different port than the Grafana HTTP server. Check what debugging endpoints are available by browsing `http://<profile-addr><profile-port>/debug/pprof`.
|
||||
|
||||
There are some additional [godeltaprof](https://github.com/grafana/pyroscope-go/tree/main/godeltaprof) endpoints available which are more suitable in a continuous profiling scenario. These endpoints are `/debug/pprof/delta_heap`, `/debug/pprof/delta_block`, `/debug/pprof/delta_mutex`.
|
||||
|
||||
You can configure or override profiling settings using environment variables:
|
||||
|
||||
@ -34,9 +51,41 @@ You can configure or override profiling settings using environment variables:
|
||||
export GF_DIAGNOSTICS_PROFILING_ENABLED=true
|
||||
export GF_DIAGNOSTICS_PROFILING_ADDR=0.0.0.0
|
||||
export GF_DIAGNOSTICS_PROFILING_PORT=8080
|
||||
export GF_DIAGNOSTICS_PROFILING_BLOCK_RATE=5
|
||||
export GF_DIAGNOSTICS_PROFILING_MUTEX_RATE=5
|
||||
```
|
||||
|
||||
Refer to [Go command pprof](https://golang.org/cmd/pprof/) for more information about how to collect and analyze profiling data.
|
||||
In general, you use the [Go command pprof](https://golang.org/cmd/pprof/) to both collect and analyze profiling data. You can also use [curl](https://curl.se/) or similar to collect profiles which could be convenient in environments where you don't have the Go/pprof command available. Next, some usage examples of using curl and pprof to collect and analyze memory and CPU profiles.
|
||||
|
||||
**Analyzing high memory usage/memory leaks:**
|
||||
|
||||
When experiencing high memory usage or potential memory leaks it's useful to collect several heap profiles and later when analyzing, compare them. It's a good idea to wait some time, e.g. 30 seconds, between collecting each profile to allow memory consumption to increase.
|
||||
|
||||
```bash
|
||||
curl http://<profile-addr>:<profile-port>/debug/pprof/heap > heap1.pprof
|
||||
sleep 30
|
||||
curl http://<profile-addr>:<profile-port>/debug/pprof/heap > heap2.pprof
|
||||
```
|
||||
|
||||
You can then use pprof tool to compare two heap profiles:
|
||||
|
||||
```bash
|
||||
go tool pprof -http=localhost:8081 --base heap1.pprof heap2.pprof
|
||||
```
|
||||
|
||||
**Analyzing high CPU usage:**
|
||||
|
||||
When experiencing high CPU usage it's suggested to collect CPU profiles over a period of time, e.g. 30 seconds.
|
||||
|
||||
```bash
|
||||
curl 'http://<profile-addr>:<profile-port>/debug/pprof/profile?seconds=30' > profile.pprof
|
||||
```
|
||||
|
||||
You can then use pprof tool to analyze the collected CPU profile:
|
||||
|
||||
```bash
|
||||
go tool pprof -http=localhost:8081 profile.pprof
|
||||
```
|
||||
|
||||
## Use tracing
|
||||
|
||||
|
@ -56,6 +56,10 @@ sudo yum install freetype*
|
||||
sudo yum install urw-fonts
|
||||
```
|
||||
|
||||
## Troubleshoot backend performance
|
||||
|
||||
If you're experiencing backend performance problems, such as high memory or CPU usage, please refer to [Configure profiling and tracing to troubleshoot Grafana]({{< relref "../setup-grafana/configure-grafana/configure-tracing/index.md" >}}).
|
||||
|
||||
## More help
|
||||
|
||||
Check out the [Grafana Community](https://community.grafana.com/) for more troubleshooting help (you must be logged in to post or comment).
|
||||
|
@ -893,6 +893,7 @@ golang.org/x/image v0.6.0/go.mod h1:MXLdDR43H7cDJq5GEGXEVeeNhPgi+YYEQ2pC1byI1x0=
|
||||
golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028 h1:4+4C/Iv2U4fMZBiMCc98MG1In4gJY5YRhtpDNeDeHWs=
|
||||
golang.org/x/telemetry v0.0.0-20240521205824-bda55230c457 h1:zf5N6UOrA487eEFacMePxjXAJctxKmyjKUsjA11Uzuk=
|
||||
golang.org/x/telemetry v0.0.0-20240521205824-bda55230c457/go.mod h1:pRgIJT+bRLFKnoM1ldnzKoxTIn14Yxz928LQRYYgIN0=
|
||||
gonum.org/v1/gonum v0.14.0 h1:2NiG67LD1tEH0D7kM+ps2V+fXmsAnpUeec7n8tcr4S0=
|
||||
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
|
||||
gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0 h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc=
|
||||
gonum.org/v1/plot v0.10.1 h1:dnifSs43YJuNMDzB7v8wV64O4ABBHReuAVAoBxqBqS4=
|
||||
|
@ -77,7 +77,7 @@ func RunServer(opts ServerOptions) error {
|
||||
}
|
||||
}()
|
||||
|
||||
if err := setupProfiling(Profile, ProfileAddr, ProfilePort); err != nil {
|
||||
if err := setupProfiling(Profile, ProfileAddr, ProfilePort, ProfileBlockRate, ProfileMutexFraction); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupTracing(Tracing, TracingFile, logger); err != nil {
|
||||
|
@ -12,24 +12,30 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
profilingEnabledEnvName = "GF_DIAGNOSTICS_PROFILING_ENABLED"
|
||||
profilingAddrEnvName = "GF_DIAGNOSTICS_PROFILING_ADDR"
|
||||
profilingPortEnvName = "GF_DIAGNOSTICS_PROFILING_PORT"
|
||||
tracingEnabledEnvName = "GF_DIAGNOSTICS_TRACING_ENABLED"
|
||||
tracingFileEnvName = "GF_DIAGNOSTICS_TRACING_FILE"
|
||||
profilingEnabledEnvName = "GF_DIAGNOSTICS_PROFILING_ENABLED"
|
||||
profilingAddrEnvName = "GF_DIAGNOSTICS_PROFILING_ADDR"
|
||||
profilingPortEnvName = "GF_DIAGNOSTICS_PROFILING_PORT"
|
||||
profilingBlockRateEnvName = "GF_DIAGNOSTICS_PROFILING_BLOCK_RATE"
|
||||
profilingMutexRateEnvName = "GF_DIAGNOSTICS_PROFILING_MUTEX_RATE"
|
||||
tracingEnabledEnvName = "GF_DIAGNOSTICS_TRACING_ENABLED"
|
||||
tracingFileEnvName = "GF_DIAGNOSTICS_TRACING_FILE"
|
||||
)
|
||||
|
||||
type profilingDiagnostics struct {
|
||||
enabled bool
|
||||
addr string
|
||||
port uint64
|
||||
enabled bool
|
||||
addr string
|
||||
port uint64
|
||||
blockRate int
|
||||
mutexRate int
|
||||
}
|
||||
|
||||
func newProfilingDiagnostics(enabled bool, addr string, port uint64) *profilingDiagnostics {
|
||||
func newProfilingDiagnostics(enabled bool, addr string, port uint64, blockRate int, mutexRate int) *profilingDiagnostics {
|
||||
return &profilingDiagnostics{
|
||||
enabled: enabled,
|
||||
addr: addr,
|
||||
port: port,
|
||||
enabled: enabled,
|
||||
addr: addr,
|
||||
port: port,
|
||||
blockRate: blockRate,
|
||||
mutexRate: mutexRate,
|
||||
}
|
||||
}
|
||||
|
||||
@ -57,6 +63,24 @@ func (pd *profilingDiagnostics) overrideWithEnv() error {
|
||||
pd.port = port
|
||||
}
|
||||
|
||||
blockRateEnv := os.Getenv(profilingBlockRateEnvName)
|
||||
if blockRateEnv != "" {
|
||||
blockRate, err := strconv.Atoi(blockRateEnv)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s environment variable as int", profilingBlockRateEnvName)
|
||||
}
|
||||
pd.blockRate = blockRate
|
||||
}
|
||||
|
||||
mutexFractionEnv := os.Getenv(profilingMutexRateEnvName)
|
||||
if mutexFractionEnv != "" {
|
||||
mutexProfileFraction, err := strconv.Atoi(mutexFractionEnv)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s environment variable as int", profilingMutexRateEnvName)
|
||||
}
|
||||
pd.mutexRate = mutexProfileFraction
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -90,15 +114,17 @@ func (td *tracingDiagnostics) overrideWithEnv() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupProfiling(profile bool, profileAddr string, profilePort uint64) error {
|
||||
profileDiagnostics := newProfilingDiagnostics(profile, profileAddr, profilePort)
|
||||
func setupProfiling(profile bool, profileAddr string, profilePort uint64, blockRate int, mutexFraction int) error {
|
||||
profileDiagnostics := newProfilingDiagnostics(profile, profileAddr, profilePort, blockRate, mutexFraction)
|
||||
if err := profileDiagnostics.overrideWithEnv(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if profileDiagnostics.enabled {
|
||||
fmt.Println("diagnostics: pprof profiling enabled", "addr", profileDiagnostics.addr, "port", profileDiagnostics.port)
|
||||
runtime.SetBlockProfileRate(1)
|
||||
fmt.Println("diagnostics: pprof profiling enabled", "addr", profileDiagnostics.addr, "port", profileDiagnostics.port, "blockProfileRate", profileDiagnostics.blockRate, "mutexProfileRate", profileDiagnostics.mutexRate)
|
||||
runtime.SetBlockProfileRate(profileDiagnostics.blockRate)
|
||||
runtime.SetMutexProfileFraction(profileDiagnostics.mutexRate)
|
||||
|
||||
go func() {
|
||||
// TODO: We should enable the linter and fix G114 here.
|
||||
// G114: Use of net/http serve function that has no support for setting timeouts (gosec)
|
||||
|
@ -9,17 +9,21 @@ import (
|
||||
|
||||
func TestProfilingDiagnostics(t *testing.T) {
|
||||
tcs := []struct {
|
||||
defaults *profilingDiagnostics
|
||||
enabledEnv string
|
||||
addrEnv string
|
||||
portEnv string
|
||||
expected *profilingDiagnostics
|
||||
defaults *profilingDiagnostics
|
||||
enabledEnv string
|
||||
addrEnv string
|
||||
portEnv string
|
||||
blockRateEnv string
|
||||
mutexRateEnv string
|
||||
expected *profilingDiagnostics
|
||||
}{
|
||||
{defaults: newProfilingDiagnostics(false, "localhost", 6060), enabledEnv: "", addrEnv: "", portEnv: "", expected: newProfilingDiagnostics(false, "localhost", 6060)},
|
||||
{defaults: newProfilingDiagnostics(true, "0.0.0.0", 8080), enabledEnv: "", addrEnv: "", portEnv: "", expected: newProfilingDiagnostics(true, "0.0.0.0", 8080)},
|
||||
{defaults: newProfilingDiagnostics(false, "", 6060), enabledEnv: "false", addrEnv: "", portEnv: "8080", expected: newProfilingDiagnostics(false, "", 8080)},
|
||||
{defaults: newProfilingDiagnostics(false, "localhost", 6060), enabledEnv: "true", addrEnv: "0.0.0.0", portEnv: "8080", expected: newProfilingDiagnostics(true, "0.0.0.0", 8080)},
|
||||
{defaults: newProfilingDiagnostics(false, "127.0.0.1", 6060), enabledEnv: "true", addrEnv: "", portEnv: "", expected: newProfilingDiagnostics(true, "127.0.0.1", 6060)},
|
||||
{defaults: newProfilingDiagnostics(false, "localhost", 6060, 0, 0), enabledEnv: "", addrEnv: "", portEnv: "", expected: newProfilingDiagnostics(false, "localhost", 6060, 0, 0)},
|
||||
{defaults: newProfilingDiagnostics(true, "0.0.0.0", 8080, 0, 0), enabledEnv: "", addrEnv: "", portEnv: "", expected: newProfilingDiagnostics(true, "0.0.0.0", 8080, 0, 0)},
|
||||
{defaults: newProfilingDiagnostics(false, "", 6060, 0, 0), enabledEnv: "false", addrEnv: "", portEnv: "8080", expected: newProfilingDiagnostics(false, "", 8080, 0, 0)},
|
||||
{defaults: newProfilingDiagnostics(false, "localhost", 6060, 0, 0), enabledEnv: "true", addrEnv: "0.0.0.0", portEnv: "8080", expected: newProfilingDiagnostics(true, "0.0.0.0", 8080, 0, 0)},
|
||||
{defaults: newProfilingDiagnostics(false, "127.0.0.1", 6060, 0, 0), enabledEnv: "true", addrEnv: "", portEnv: "", expected: newProfilingDiagnostics(true, "127.0.0.1", 6060, 0, 0)},
|
||||
{defaults: newProfilingDiagnostics(true, "localhost", 6060, 0, 0), enabledEnv: "", addrEnv: "", portEnv: "", blockRateEnv: "3", mutexRateEnv: "4", expected: newProfilingDiagnostics(true, "localhost", 6060, 3, 4)},
|
||||
{defaults: newProfilingDiagnostics(true, "localhost", 6060, 0, 0), enabledEnv: "", addrEnv: "", portEnv: "", expected: newProfilingDiagnostics(true, "localhost", 6060, 0, 0)},
|
||||
}
|
||||
|
||||
for i, tc := range tcs {
|
||||
@ -33,6 +37,12 @@ func TestProfilingDiagnostics(t *testing.T) {
|
||||
if tc.portEnv != "" {
|
||||
t.Setenv(profilingPortEnvName, tc.portEnv)
|
||||
}
|
||||
if tc.blockRateEnv != "" {
|
||||
t.Setenv(profilingBlockRateEnvName, tc.blockRateEnv)
|
||||
}
|
||||
if tc.mutexRateEnv != "" {
|
||||
t.Setenv(profilingMutexRateEnvName, tc.mutexRateEnv)
|
||||
}
|
||||
err := tc.defaults.overrideWithEnv()
|
||||
assert.NoError(t, err)
|
||||
assert.Exactly(t, tc.expected, tc.defaults)
|
||||
|
@ -1,21 +1,28 @@
|
||||
package commands
|
||||
|
||||
import "github.com/urfave/cli/v2"
|
||||
import (
|
||||
"runtime"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
// flags for the grafana server command(s)
|
||||
var (
|
||||
ConfigFile string
|
||||
HomePath string
|
||||
PidFile string
|
||||
Packaging string
|
||||
ConfigOverrides string
|
||||
Version bool
|
||||
VerboseVersion bool
|
||||
Profile bool
|
||||
ProfileAddr string
|
||||
ProfilePort uint64
|
||||
Tracing bool
|
||||
TracingFile string
|
||||
ConfigFile string
|
||||
HomePath string
|
||||
PidFile string
|
||||
Packaging string
|
||||
ConfigOverrides string
|
||||
Version bool
|
||||
VerboseVersion bool
|
||||
Profile bool
|
||||
ProfileAddr string
|
||||
ProfilePort uint64
|
||||
ProfileBlockRate int
|
||||
ProfileMutexFraction int
|
||||
ProfileContention bool
|
||||
Tracing bool
|
||||
TracingFile string
|
||||
)
|
||||
|
||||
var commonFlags = []cli.Flag{
|
||||
@ -75,6 +82,18 @@ var commonFlags = []cli.Flag{
|
||||
Usage: "Define custom port for profiling",
|
||||
Destination: &ProfilePort,
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: "profile-block-rate",
|
||||
Value: 1,
|
||||
Usage: "Controls the fraction of goroutine blocking events that are reported in the blocking profile. The profiler aims to sample an average of one blocking event per rate nanoseconds spent blocked. To turn off profiling entirely, use 0",
|
||||
Destination: &ProfileBlockRate,
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: "profile-mutex-rate",
|
||||
Value: runtime.SetMutexProfileFraction(-1),
|
||||
Usage: "Controls the fraction of mutex contention events that are reported in the mutex profile. On average 1/rate events are reported. To turn off mutex profiling entirely, use 0",
|
||||
Destination: &ProfileMutexFraction,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "tracing",
|
||||
Value: false,
|
||||
|
@ -54,7 +54,7 @@ func RunTargetServer(opts ServerOptions) error {
|
||||
}
|
||||
}()
|
||||
|
||||
if err := setupProfiling(Profile, ProfileAddr, ProfilePort); err != nil {
|
||||
if err := setupProfiling(Profile, ProfileAddr, ProfilePort, ProfileBlockRate, ProfileMutexFraction); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupTracing(Tracing, TracingFile, logger); err != nil {
|
||||
|
Loading…
Reference in New Issue
Block a user