docker: updated our graphite docker container

This commit is contained in:
Torkel Ödegaard
2017-10-11 16:40:15 +02:00
parent 4c8310c2bf
commit 3471e262a5
19 changed files with 484 additions and 452 deletions

View File

@@ -8,18 +8,18 @@
# Defaults to ../
# GRAPHITE_CONF_DIR - Configuration directory (where this file lives).
# Defaults to $GRAPHITE_ROOT/conf/
# GRAPHITE_STORAGE_DIR - Storage directory for whipser/rrd/log/pid files.
# GRAPHITE_STORAGE_DIR - Storage directory for whisper/rrd/log/pid files.
# Defaults to $GRAPHITE_ROOT/storage/
#
# To change other directory paths, add settings to this file. The following
# configuration variables are available with these default values:
#
# STORAGE_DIR = $GRAPHITE_STORAGE_DIR
# LOCAL_DATA_DIR = STORAGE_DIR/whisper/
# WHITELISTS_DIR = STORAGE_DIR/lists/
# CONF_DIR = STORAGE_DIR/conf/
# LOG_DIR = STORAGE_DIR/log/
# PID_DIR = STORAGE_DIR/
# LOCAL_DATA_DIR = %(STORAGE_DIR)s/whisper/
# WHITELISTS_DIR = %(STORAGE_DIR)s/lists/
# CONF_DIR = %(STORAGE_DIR)s/conf/
# LOG_DIR = %(STORAGE_DIR)s/log/
# PID_DIR = %(STORAGE_DIR)s/
#
# For FHS style directory structures, use:
#
@@ -30,20 +30,30 @@
#
#LOCAL_DATA_DIR = /opt/graphite/storage/whisper/
# Enable daily log rotation. If disabled, a kill -HUP can be used after a manual rotate
# Specify the database library used to store metric data on disk. Each database
# may have configurable options to change the behaviour of how it writes to
# persistent storage.
#
# whisper - Fixed-size database, similar in design and purpose to RRD. This is
# the default storage backend for carbon and the most rigorously tested.
#
# ceres - Experimental alternative database that supports storing data in sparse
# files of arbitrary fixed-size resolutions.
DATABASE = whisper
# Enable daily log rotation. If disabled, a new file will be opened whenever the log file path no
# longer exists (i.e. it is removed or renamed)
ENABLE_LOGROTATION = True
# Specify the user to drop privileges to
# If this is blank carbon runs as the user that invokes it
# If this is blank carbon-cache runs as the user that invokes it
# This user must have write access to the local data directory
USER =
#
# NOTE: The above settings must be set under [relay] and [aggregator]
# to take effect for those daemons as well
# Limit the size of the cache to avoid swapping or becoming CPU bound.
# Sorts and serving cache queries gets more expensive as the cache grows.
# Use the value "inf" (infinity) for an unlimited cache size.
# value should be an integer number of metric datapoints.
MAX_CACHE_SIZE = inf
# Limits the number of whisper update_many() calls per second, which effectively
@@ -60,14 +70,30 @@ MAX_UPDATES_PER_SECOND = 500
# MAX_UPDATES_PER_SECOND_ON_SHUTDOWN = 1000
# Softly limits the number of whisper files that get created each minute.
# Setting this value low (like at 50) is a good way to ensure your graphite
# Setting this value low (e.g. 50) is a good way to ensure that your carbon
# system will not be adversely impacted when a bunch of new metrics are
# sent to it. The trade off is that it will take much longer for those metrics'
# database files to all get created and thus longer until the data becomes usable.
# Setting this value high (like "inf" for infinity) will cause graphite to create
# the files quickly but at the risk of slowing I/O down considerably for a while.
# sent to it. The trade off is that any metrics received in excess of this
# value will be silently dropped, and the whisper file will not be created
# until such point as a subsequent metric is received and fits within the
# defined rate limit. Setting this value high (like "inf" for infinity) will
# cause carbon to create the files quickly but at the risk of increased I/O.
MAX_CREATES_PER_MINUTE = 50
# Set the minimum timestamp resolution supported by this instance. This allows
# internal optimisations by overwriting points with equal truncated timestamps
# in order to limit the number of updates to the database. It defaults to one
# second.
MIN_TIMESTAMP_RESOLUTION = 1
# Set the minimum lag in seconds for a point to be written to the database
# in order to optimize batching. This means that each point will wait at least
# the duration of this lag before being written. Setting this to 0 disable the feature.
# This currently only works when using the timesorted write strategy.
# MIN_TIMESTAMP_LAG = 0
# Set the interface and port for the line (plain text) listener. Setting the
# interface to 0.0.0.0 listens on all interfaces. Port can be set to 0 to
# disable this listener if it is not required.
LINE_RECEIVER_INTERFACE = 0.0.0.0
LINE_RECEIVER_PORT = 2003
@@ -78,11 +104,23 @@ ENABLE_UDP_LISTENER = False
UDP_RECEIVER_INTERFACE = 0.0.0.0
UDP_RECEIVER_PORT = 2003
# Set the interface and port for the pickle listener. Setting the interface to
# 0.0.0.0 listens on all interfaces. Port can be set to 0 to disable this
# listener if it is not required.
PICKLE_RECEIVER_INTERFACE = 0.0.0.0
PICKLE_RECEIVER_PORT = 2004
# Set to false to disable logging of successful connections
LOG_LISTENER_CONNECTIONS = True
# Set the interface and port for the protobuf listener. Setting the interface to
# 0.0.0.0 listens on all interfaces. Port can be set to 0 to disable this
# listener if it is not required.
# PROTOBUF_RECEIVER_INTERFACE = 0.0.0.0
# PROTOBUF_RECEIVER_PORT = 2005
# Limit the number of open connections the receiver can handle as any time.
# Default is no limit. Setting up a limit for sites handling high volume
# traffic may be recommended to avoid running out of TCP memory or having
# thousands of TCP connections reduce the throughput of the service.
#MAX_RECEIVER_CONNECTIONS = inf
# Per security concerns outlined in Bug #817247 the pickle receiver
# will use a more secure and slightly less efficient unpickler.
@@ -98,13 +136,19 @@ CACHE_QUERY_PORT = 7002
# data until the cache size falls below 95% MAX_CACHE_SIZE.
USE_FLOW_CONTROL = True
# By default, carbon-cache will log every whisper update and cache hit. This can be excessive and
# degrade performance if logging on the same volume as the whisper data is stored.
LOG_UPDATES = False
LOG_CACHE_HITS = False
LOG_CACHE_QUEUE_SORTS = True
# If enabled this setting is used to timeout metric client connection if no
# metrics have been sent in specified time in seconds
#METRIC_CLIENT_IDLE_TIMEOUT = None
# The thread that writes metrics to disk can use on of the following strategies
# By default, carbon-cache will log every whisper update and cache hit.
# This can be excessive and degrade performance if logging on the same
# volume as the whisper data is stored.
LOG_UPDATES = False
LOG_CREATES = False
LOG_CACHE_HITS = False
LOG_CACHE_QUEUE_SORTS = False
# The thread that writes metrics to disk can use one of the following strategies
# determining the order in which metrics are removed from cache and flushed to
# disk. The default option preserves the same behavior as has been historically
# available in version 0.9.10.
@@ -114,6 +158,12 @@ LOG_CACHE_QUEUE_SORTS = True
# moment of the list's creation. Metrics will then be flushed from the cache to
# disk in that order.
#
# timesorted - All metrics in the list will be looked at and sorted according
# to the timestamp of there datapoints. The metric that were the least recently
# written will be written first. This is an hybrid strategy between max and
# sorted which is particularly adapted to sets of metrics with non-uniform
# resolutions.
#
# max - The writer thread will always pop and flush the metric from cache
# that has the most datapoints. This will give a strong flush preference to
# frequently updated metrics and will also reduce random file-io. Infrequently
@@ -152,12 +202,61 @@ WHISPER_FALLOCATE_CREATE = True
# Enabling this option will cause Whisper to lock each Whisper file it writes
# to with an exclusive lock (LOCK_EX, see: man 2 flock). This is useful when
# multiple carbon-cache daemons are writing to the same files
# multiple carbon-cache daemons are writing to the same files.
# WHISPER_LOCK_WRITES = False
# On systems which has a large number of metrics, an amount of Whisper write(2)'s
# pageback sometimes cause disk thrashing due to memory shortage, so that abnormal
# disk reads occur. Enabling this option makes it possible to decrease useless
# page cache memory by posix_fadvise(2) with POSIX_FADVISE_RANDOM option.
# WHISPER_FADVISE_RANDOM = False
# By default all nodes stored in Ceres are cached in memory to improve the
# throughput of reads and writes to underlying slices. Turning this off will
# greatly reduce memory consumption for databases with millions of metrics, at
# the cost of a steep increase in disk i/o, approximately an extra two os.stat
# calls for every read and write. Reasons to do this are if the underlying
# storage can handle stat() with practically zero cost (SSD, NVMe, zRAM).
# Valid values are:
# all - all nodes are cached
# none - node caching is disabled
# CERES_NODE_CACHING_BEHAVIOR = all
# Ceres nodes can have many slices and caching the right ones can improve
# performance dramatically. Note that there are many trade-offs to tinkering
# with this, and unless you are a ceres developer you *really* should not
# mess with this. Valid values are:
# latest - only the most recent slice is cached
# all - all slices are cached
# none - slice caching is disabled
# CERES_SLICE_CACHING_BEHAVIOR = latest
# If a Ceres node accumulates too many slices, performance can suffer.
# This can be caused by intermittently reported data. To mitigate
# slice fragmentation there is a tolerance for how much space can be
# wasted within a slice file to avoid creating a new one. That tolerance
# level is determined by MAX_SLICE_GAP, which is the number of consecutive
# null datapoints allowed in a slice file.
# If you set this very low, you will waste less of the *tiny* bit disk space
# that this feature wastes, and you will be prone to performance problems
# caused by slice fragmentation, which can be pretty severe.
# If you set this really high, you will waste a bit more disk space (each
# null datapoint wastes 8 bytes, but keep in mind your filesystem's block
# size). If you suffer slice fragmentation issues, you should increase this or
# run the ceres-maintenance defrag plugin more often. However you should not
# set it to be huge because then if a large but allowed gap occurs it has to
# get filled in, which means instead of a simple 8-byte write to a new file we
# could end up doing an (8 * MAX_SLICE_GAP)-byte write to the latest slice.
# CERES_MAX_SLICE_GAP = 80
# Enabling this option will cause Ceres to lock each Ceres file it writes to
# to with an exclusive lock (LOCK_EX, see: man 2 flock). This is useful when
# multiple carbon-cache daemons are writing to the same files.
# CERES_LOCK_WRITES = False
# Set this to True to enable whitelisting and blacklisting of metrics in
# CONF_DIR/whitelist and CONF_DIR/blacklist. If the whitelist is missing or
# empty, all metrics will pass through
# CONF_DIR/whitelist.conf and CONF_DIR/blacklist.conf. If the whitelist is
# missing or empty, all metrics will pass through
# USE_WHITELIST = False
# By default, carbon itself will log statistics (such as a count,
@@ -203,16 +302,25 @@ WHISPER_FALLOCATE_CREATE = True
# Example: store everything
# BIND_PATTERNS = #
# URL of graphite-web instance, this is used to add incoming series to the tag database
GRAPHITE_URL = http://127.0.0.1:80
# Tag update interval, this specifies how frequently updates to existing series will trigger
# an update to the tag index, the default setting is once every 100 updates
# TAG_UPDATE_INTERVAL = 100
# To configure special settings for the carbon-cache instance 'b', uncomment this:
#[cache:b]
#LINE_RECEIVER_PORT = 2103
#PICKLE_RECEIVER_PORT = 2104
#CACHE_QUERY_PORT = 7102
# and any other settings you want to customize, defaults are inherited
# from [carbon] section.
# from the [cache] section.
# You can then specify the --instance=b option to manage this instance
#
# In order to turn off logging of successful connections for the line
# receiver, set this to False
# LOG_LISTENER_CONN_SUCCESS = True
[relay]
LINE_RECEIVER_INTERFACE = 0.0.0.0
@@ -220,9 +328,6 @@ LINE_RECEIVER_PORT = 2013
PICKLE_RECEIVER_INTERFACE = 0.0.0.0
PICKLE_RECEIVER_PORT = 2014
# Set to false to disable logging of successful connections
LOG_LISTENER_CONNECTIONS = True
# Carbon-relay has several options for metric routing controlled by RELAY_METHOD
#
# Use relay-rules.conf to route metrics to destinations based on pattern rules
@@ -237,12 +342,24 @@ LOG_LISTENER_CONNECTIONS = True
# instance.
# Enable this for carbon-relays that send to a group of carbon-aggregators
#RELAY_METHOD = aggregated-consistent-hashing
#
# You can also use fast-hashing and fast-aggregated-hashing which are in O(1)
# and will always redirect the metrics to the same destination but do not try
# to minimize rebalancing when the list of destinations is changing.
RELAY_METHOD = rules
# If you use consistent-hashing you can add redundancy by replicating every
# datapoint to more than one machine.
REPLICATION_FACTOR = 1
# For REPLICATION_FACTOR >=2, set DIVERSE_REPLICAS to True to guarantee replicas
# across distributed hosts. With this setting disabled, it's possible that replicas
# may be sent to different caches on the same host. This has been the default
# behavior since introduction of 'consistent-hashing' relay method.
# Note that enabling this on an existing pre-0.9.14 cluster will require rebalancing
# your metrics across the cluster nodes using a tool like Carbonate.
#DIVERSE_REPLICAS = True
# This is a list of carbon daemons we will send any relayed or
# generated metrics to. The default provided would send to a single
# carbon-cache instance on the default port. However if you
@@ -261,20 +378,71 @@ REPLICATION_FACTOR = 1
# must be defined in this list
DESTINATIONS = 127.0.0.1:2004
# This defines the maximum "message size" between carbon daemons.
# You shouldn't need to tune this unless you really know what you're doing.
MAX_DATAPOINTS_PER_MESSAGE = 500
# This define the protocol to use to contact the destination. It can be
# set to one of "line", "pickle", "udp" and "protobuf". This list can be
# extended with CarbonClientFactory plugins and defaults to "pickle".
# DESTINATION_PROTOCOL = pickle
# When using consistent hashing it sometime makes sense to make
# the ring dynamic when you don't want to loose points when a
# single destination is down. Replication is an answer to that
# but it can be quite expensive.
# DYNAMIC_ROUTER = False
# Controls the number of connection attempts before marking a
# destination as down. We usually do one connection attempt per
# second.
# DYNAMIC_ROUTER_MAX_RETRIES = 5
# This is the maximum number of datapoints that can be queued up
# for a single destination. Once this limit is hit, we will
# stop accepting new data if USE_FLOW_CONTROL is True, otherwise
# we will drop any subsequently received datapoints.
MAX_QUEUE_SIZE = 10000
# This defines the maximum "message size" between carbon daemons. If
# your queue is large, setting this to a lower number will cause the
# relay to forward smaller discrete chunks of stats, which may prevent
# overloading on the receiving side after a disconnect.
MAX_DATAPOINTS_PER_MESSAGE = 500
# Limit the number of open connections the receiver can handle as any time.
# Default is no limit. Setting up a limit for sites handling high volume
# traffic may be recommended to avoid running out of TCP memory or having
# thousands of TCP connections reduce the throughput of the service.
#MAX_RECEIVER_CONNECTIONS = inf
# Specify the user to drop privileges to
# If this is blank carbon-relay runs as the user that invokes it
# USER =
# This is the percentage that the queue must be empty before it will accept
# more messages. For a larger site, if the queue is very large it makes sense
# to tune this to allow for incoming stats. So if you have an average
# flow of 100k stats/minute, and a MAX_QUEUE_SIZE of 3,000,000, it makes sense
# to allow stats to start flowing when you've cleared the queue to 95% since
# you should have space to accommodate the next minute's worth of stats
# even before the relay incrementally clears more of the queue
QUEUE_LOW_WATERMARK_PCT = 0.8
# To allow for batch efficiency from the pickle protocol and to benefit from
# other batching advantages, all writes are deferred by putting them into a queue,
# and then the queue is flushed and sent a small fraction of a second later.
TIME_TO_DEFER_SENDING = 0.0001
# Set this to False to drop datapoints when any send queue (sending datapoints
# to a downstream carbon daemon) hits MAX_QUEUE_SIZE. If this is True (the
# default) then sockets over which metrics are received will temporarily stop accepting
# data until the send queues fall below 80% MAX_QUEUE_SIZE.
# data until the send queues fall below QUEUE_LOW_WATERMARK_PCT * MAX_QUEUE_SIZE.
USE_FLOW_CONTROL = True
# If enabled this setting is used to timeout metric client connection if no
# metrics have been sent in specified time in seconds
#METRIC_CLIENT_IDLE_TIMEOUT = None
# Set this to True to enable whitelisting and blacklisting of metrics in
# CONF_DIR/whitelist and CONF_DIR/blacklist. If the whitelist is missing or
# empty, all metrics will pass through
# CONF_DIR/whitelist.conf and CONF_DIR/blacklist.conf. If the whitelist is
# missing or empty, all metrics will pass through
# USE_WHITELIST = False
# By default, carbon itself will log statistics (such as a count,
@@ -282,7 +450,40 @@ USE_FLOW_CONTROL = True
# seconds. Set CARBON_METRIC_INTERVAL to 0 to disable instrumentation
# CARBON_METRIC_PREFIX = carbon
# CARBON_METRIC_INTERVAL = 60
#
# In order to turn off logging of successful connections for the line
# receiver, set this to False
# LOG_LISTENER_CONN_SUCCESS = True
# If you're connecting from the relay to a destination that's over the
# internet or similarly iffy connection, a backlog can develop because
# of internet weather conditions, e.g. acks getting lost or similar issues.
# To deal with that, you can enable USE_RATIO_RESET which will let you
# re-set the connection to an individual destination. Defaults to being off.
USE_RATIO_RESET=False
# When there is a small number of stats flowing, it's not desirable to
# perform any actions based on percentages - it's just too "twitchy".
MIN_RESET_STAT_FLOW=1000
# When the ratio of stats being sent in a reporting interval is far
# enough from 1.0, we will disconnect the socket and reconnecto to
# clear out queued stats. The default ratio of 0.9 indicates that 10%
# of stats aren't being delivered within one CARBON_METRIC_INTERVAL
# (default of 60 seconds), which can lead to a queue backup. Under
# some circumstances re-setting the connection can fix this, so
# set this according to your tolerance, and look in the logs for
# "resetConnectionForQualityReasons" to observe whether this is kicking
# in when your sent queue is building up.
MIN_RESET_RATIO=0.9
# The minimum time between resets. When a connection is re-set, we
# need to wait before another reset is performed.
# (2*CARBON_METRIC_INTERVAL) + 1 second is the minimum time needed
# before stats for the new connection will be available. Setting this
# below (2*CARBON_METRIC_INTERVAL) + 1 second will result in a lot of
# reset connections for no good reason.
MIN_RESET_INTERVAL=121
[aggregator]
LINE_RECEIVER_INTERFACE = 0.0.0.0
@@ -291,14 +492,17 @@ LINE_RECEIVER_PORT = 2023
PICKLE_RECEIVER_INTERFACE = 0.0.0.0
PICKLE_RECEIVER_PORT = 2024
# Set to false to disable logging of successful connections
LOG_LISTENER_CONNECTIONS = True
# If set true, metric received will be forwarded to DESTINATIONS in addition to
# the output of the aggregation rules. If set false the carbon-aggregator will
# only ever send the output of aggregation.
FORWARD_ALL = True
# Filenames of the configuration files to use for this instance of aggregator.
# Filenames are relative to CONF_DIR.
#
# AGGREGATION_RULES = aggregation-rules.conf
# REWRITE_RULES = rewrite-rules.conf
# This is a list of carbon daemons we will send any relayed or
# generated metrics to. The default provided would send to a single
# carbon-cache instance on the default port. However if you
@@ -330,6 +534,10 @@ MAX_QUEUE_SIZE = 10000
# data until the send queues fall below 80% MAX_QUEUE_SIZE.
USE_FLOW_CONTROL = True
# If enabled this setting is used to timeout metric client connection if no
# metrics have been sent in specified time in seconds
#METRIC_CLIENT_IDLE_TIMEOUT = None
# This defines the maximum "message size" between carbon daemons.
# You shouldn't need to tune this unless you really know what you're doing.
MAX_DATAPOINTS_PER_MESSAGE = 500
@@ -339,6 +547,12 @@ MAX_DATAPOINTS_PER_MESSAGE = 500
# the past MAX_AGGREGATION_INTERVALS * intervalSize seconds.
MAX_AGGREGATION_INTERVALS = 5
# Limit the number of open connections the receiver can handle as any time.
# Default is no limit. Setting up a limit for sites handling high volume
# traffic may be recommended to avoid running out of TCP memory or having
# thousands of TCP connections reduce the throughput of the service.
#MAX_RECEIVER_CONNECTIONS = inf
# By default (WRITE_BACK_FREQUENCY = 0), carbon-aggregator will write back
# aggregated data points once every rule.frequency seconds, on a per-rule basis.
# Set this (WRITE_BACK_FREQUENCY = N) to write back all aggregated data points
@@ -348,8 +562,8 @@ MAX_AGGREGATION_INTERVALS = 5
# WRITE_BACK_FREQUENCY = 0
# Set this to True to enable whitelisting and blacklisting of metrics in
# CONF_DIR/whitelist and CONF_DIR/blacklist. If the whitelist is missing or
# empty, all metrics will pass through
# CONF_DIR/whitelist.conf and CONF_DIR/blacklist.conf. If the whitelist is
# missing or empty, all metrics will pass through
# USE_WHITELIST = False
# By default, carbon itself will log statistics (such as a count,
@@ -357,3 +571,24 @@ MAX_AGGREGATION_INTERVALS = 5
# seconds. Set CARBON_METRIC_INTERVAL to 0 to disable instrumentation
# CARBON_METRIC_PREFIX = carbon
# CARBON_METRIC_INTERVAL = 60
# In order to turn off logging of successful connections for the line
# receiver, set this to False
# LOG_LISTENER_CONN_SUCCESS = True
# In order to turn off logging of metrics with no corresponding
# aggregation rules receiver, set this to False
# LOG_AGGREGATOR_MISSES = False
# Specify the user to drop privileges to
# If this is blank carbon-aggregator runs as the user that invokes it
# USER =
# Part of the code, and particularly aggregator rules, need
# to cache metric names. To avoid leaking too much memory you
# can tweak the size of this cache. The default allow for 1M
# different metrics per rule (~200MiB).
# CACHE_METRIC_NAMES_MAX=1000000
# You can optionally set a ttl to this cache.
# CACHE_METRIC_NAMES_TTL=600

View File

@@ -40,4 +40,3 @@ aggregationMethod = sum
pattern = .*
xFilesFactor = 0.3
aggregationMethod = average

View File

@@ -1,4 +1,23 @@
# Schema definitions for Whisper files. Entries are scanned in order,
# and first match wins. This file is scanned for changes every 60 seconds.
#
# Definition Syntax:
#
# [name]
# pattern = regex
# retentions = timePerPoint:timeToStore, timePerPoint:timeToStore, ...
#
# Remember: To support accurate aggregation from higher to lower resolution
# archives, the precision of a longer retention archive must be
# cleanly divisible by precision of next lower retention archive.
#
# Valid: 60s:7d,300s:30d (300/60 = 5)
# Invalid: 180s:7d,300s:30d (300/180 = 3.333)
#
# Carbon's internal metrics. This entry should match what is specified in
# CARBON_METRIC_PREFIX and CARBON_METRIC_INTERVAL settings
[carbon]
pattern = ^carbon\..*
retentions = 1m:31d,10m:1y,1h:5y