grafana/pkg/server/instrumentation_service.go
owensmallwood 37d39de36d
Storage Api: Add metrics (#85316)
* Storage server runs own instrumentation server if its the sole target. Starts adding some sample metrics for now.

* adds metric for failed optimistic locks

* refactors metrics registration to own method on service for testability. Adds tests.

* Register sql storage server metrics from within the service

* fixes test

* troubleshooting drone test failures. Maybe timing when starting instrumentation server?

* Waits until instrumentation server has started. Updates tests.

* defer wont get called unless theres an error. removing.

* wait for instrumentation server to be running

* linter - close res body

* use port 3000 for metrics and removes test metric inc() call

* fixes test - updates port

* refactors module server to provide an instrumentation server module when there is no ALL or CORE target provided and running as single target

* make instrumentation server a dependency of all modules that do not run their own http server

* adds module server test

* adds tests for instrumentation service and removes old tests that aren't needed

* ignore error in test

* uses helper to start and run service

* when running wait on ctx done or http server err

* wait for http server

* removes println

* updates module server test to be integration test

* require no error in goroutine

* skips integration test when GRAFANA_TEST_DB not defined

* move http server start into start, verify returned content

* make test error when run fails

* try waiting longer and see if drone tests pass

* update integration test mysql creds to match drone

* go back to only waiting half second

* debug log drone mysql connection string

* use same db connection config as drone

* try using same hostname as drone

* cant use localhost as mysql hostname in drone tests. Need to parse it from the cfg db connection string

---------

Co-authored-by: Dan Cech <dcech@grafana.com>
2024-04-08 08:35:01 -06:00

69 lines
1.7 KiB
Go

package server
import (
"context"
"net"
"net/http"
"time"
"github.com/grafana/dskit/services"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
type instrumentationService struct {
*services.BasicService
httpServ *http.Server
log log.Logger
errChan chan error
}
func NewInstrumentationService(log log.Logger) (*instrumentationService, error) {
s := &instrumentationService{log: log}
s.BasicService = services.NewBasicService(s.start, s.running, s.stop)
return s, nil
}
func (s *instrumentationService) start(ctx context.Context) error {
s.httpServ = s.newInstrumentationServer(ctx)
s.errChan = make(chan error)
go func() {
s.errChan <- s.httpServ.ListenAndServe()
}()
return nil
}
func (s *instrumentationService) running(ctx context.Context) error {
select {
case <-ctx.Done():
return nil
case err := <-s.errChan:
return err
}
}
func (s *instrumentationService) stop(failureReason error) error {
s.log.Info("stopping instrumentation server", "reason", failureReason)
if err := s.httpServ.Shutdown(context.Background()); err != nil {
s.log.Error("failed to shutdown instrumentation server", "error", err)
return err
}
return nil
}
func (s *instrumentationService) newInstrumentationServer(ctx context.Context) *http.Server {
router := http.NewServeMux()
router.Handle("/metrics", promhttp.Handler())
srv := &http.Server{
// 5s timeout for header reads to avoid Slowloris attacks (https://thetooth.io/blog/slowloris-attack/)
ReadHeaderTimeout: 5 * time.Second,
Addr: ":3000", // TODO - make configurable?
Handler: router,
BaseContext: func(_ net.Listener) context.Context { return ctx },
}
return srv
}