From 5be14eeda65516f5522c4c054528ddd376cde858 Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Fri, 2 Apr 2021 10:34:07 -0500 Subject: [PATCH] docs improvements --- .../core/ActivityInstrumentation.java | 63 ++++++++++++++--- .../docs-for-nb/testing_practice/layers.puml | 8 +-- sort_docs/eb_metrics_full.puml | 70 +++++++++++++++++++ 3 files changed, 129 insertions(+), 12 deletions(-) create mode 100644 sort_docs/eb_metrics_full.puml diff --git a/engine-api/src/main/java/io/nosqlbench/engine/api/activityapi/core/ActivityInstrumentation.java b/engine-api/src/main/java/io/nosqlbench/engine/api/activityapi/core/ActivityInstrumentation.java index 2292aa7da..881628145 100644 --- a/engine-api/src/main/java/io/nosqlbench/engine/api/activityapi/core/ActivityInstrumentation.java +++ b/engine-api/src/main/java/io/nosqlbench/engine/api/activityapi/core/ActivityInstrumentation.java @@ -21,6 +21,8 @@ import com.codahale.metrics.Counter; import com.codahale.metrics.Histogram; import com.codahale.metrics.Timer; +import java.util.concurrent.Future; + /** * All the accessors of the metrics that will be used for each activity instance. * Implementors of this interface should ensure that the methods are synchronized @@ -33,13 +35,13 @@ public interface ActivityInstrumentation { /** * The input timer measures how long it takes to get the cycle value to be used for * an operation. - * @return A new or existing Timer + * @return a new or existing {@link Timer} */ Timer getOrCreateInputTimer(); /** * The strides service timer measures how long it takes to complete a stride of work. - * @return A new or existing Timer + * @return a new or existing {@link Timer} */ Timer getOrCreateStridesServiceTimer(); @@ -48,13 +50,13 @@ public interface ActivityInstrumentation { * time a stride should start to when it completed. Stride scheduling is only defined * when it is implied by a stride rate limiter, so this method should return null if * there is no strides rate limiter. - * @return A new or existing Timer if appropriate, else null + * @return a new or existing {@link Timer} if appropriate, else null */ Timer getStridesResponseTimerOrNull(); /** * The cycles service timer measures how long it takes to complete a cycle of work. - * @return A new or existing Timer + * @return a new or existing {@link Timer} */ Timer getOrCreateCyclesServiceTimer(); @@ -63,13 +65,13 @@ public interface ActivityInstrumentation { * time an operation should start to when it is completed. Cycle scheduling is only defined * when it is implied by a cycle rate limiter, so this method should return null if * there is no cycles rate limiter. - * @return A new or existing Timer if appropriate, else null + * @return a new or existing {@link Timer} if appropriate, else null */ Timer getCyclesResponseTimerOrNull(); /** * The phases service timer measures how long it takes to complete a phase of work. - * @return A new or existing Timer + * @return a new or existing {@link Timer} */ Timer getOrCreatePhasesServiceTimer(); @@ -78,26 +80,71 @@ public interface ActivityInstrumentation { * time a phase should start to when it is completed. Phase scheduling is only defined * when it is implied by a phase rate limiter, so this method should return null if * there is no phases rate limiter. - * @return A new or existing Timer if appropriate, else null + * @return a new or existing {@link Timer} if appropriate, else null */ Timer getPhasesResponseTimerOrNull(); /** * The pending ops counter keeps track of how many ops are submitted or in-flight, but * which haven't been completed yet. - * @return A new or existing Counter + * @return a new or existing {@link Counter} */ Counter getOrCreatePendingOpCounter(); Counter getOrCreateOpTrackerBlockedCounter(); + /** + * The bind timer keeps track of how long it takes for NoSQLBench to create an instance + * of an executable operation, given the cycle. This is usually done by using an + * {@link io.nosqlbench.engine.api.activityapi.planning.OpSequence} in conjunction with + * an {@link io.nosqlbench.engine.api.activityimpl.OpDispenser}. This is named for "binding + * a cycle to an operation". + * @return a new or existing {@link Timer} + */ Timer getOrCreateBindTimer(); + /** + * The execute timer keeps track of how long it takes to submit an operation to be executed + * to an underlying native driver. For asynchronous APIs, such as those which return a + * {@link Future}, this is simply the amount of time it takes to acquire the future. + * + * When possible, APIs should be used via their async methods, even if you are implementing + * a {@link SyncAction}. This allows the execute timer to measure the hand-off to the underlying API, + * and the result timer to measure the blocking calls to aquire the result. + * @return a new or existing {@link Timer} + */ Timer getOrCreateExecuteTimer(); + /** + * The result timer keeps track of how long it takes a native driver to service a request once submitted. + * This timer, in contrast to the result-success timer ({@link #getOrCreateResultSuccessTimer()}), + * is used to track all operations. That is, no matter + * whether the operation succeeds or not, it should be tracked with this timer. The scope of this timer should + * cover each attempt at an operation through a native driver. Retries are not to be combined in this measurement. + * @return a new or existing {@link Timer} + */ Timer getOrCreateResultTimer(); + /** + * The result-success timer keeps track of operations which had no exception. The measurements for this timer should + * be exactly the same values as used for the result timer ({@link #getOrCreateResultTimer()}, except that + * attempts to complete an operation which yield an exception should be excluded from the results. These two metrics + * together provide a very high level sanity check against the error-specific metrics which can be reported by + * the error handler logic. + * @return a new or existing {@link Timer} + */ Timer getOrCreateResultSuccessTimer(); + /** + * The tries histogram tracks how many tries it takes to complete an operation successfully, or not. This histogram + * does not encode whether operations were successful or not. Ideally, if every attempt to complete an operation succeeds + * on its first try, the data in this histogram should all be 1. In practice, systems which are running near their + * capacity will see a few retried operations, and systems that are substantially over-driven will see many retried + * operations. As the retries value increases the further down the percentile scale you go, you can detect system loading + * patterns which are in excess of the real-time capability of the target system. + * + * This metric should be measured around every retry loop for a native operation. + * @return a new or existing {@link Histogram} + */ Histogram getOrCreateTriesHistogram(); } diff --git a/engine-docs/src/main/resources/docs-for-nb/testing_practice/layers.puml b/engine-docs/src/main/resources/docs-for-nb/testing_practice/layers.puml index 511694f48..5ee274654 100644 --- a/engine-docs/src/main/resources/docs-for-nb/testing_practice/layers.puml +++ b/engine-docs/src/main/resources/docs-for-nb/testing_practice/layers.puml @@ -2,18 +2,18 @@ header Layers of Messaging hide footbox -box "User View" #clear +box "User View" #white actor Alice as user collections "User\nAgent" as agent collections "Client\nSide\nApp" as capp end box -box "WebApp View" #clear +box "WebApp View" #white boundary "App Server\nor node, ..." as sapp boundary "Database\nDriver" as driver end box -box "DB View" #clear +box "DB View" #white database DB as db boundary "Storage\nService" as store entity "Storage\nDevice" as device @@ -50,4 +50,4 @@ user <- agent: browser\nrenders page deactivate agent -@enduml \ No newline at end of file +@enduml diff --git a/sort_docs/eb_metrics_full.puml b/sort_docs/eb_metrics_full.puml new file mode 100644 index 000000000..3fca5d94e --- /dev/null +++ b/sort_docs/eb_metrics_full.puml @@ -0,0 +1,70 @@ +@startuml + +Participant Input as i +Participant Thread as t +Participant Action as a +Participant Activity as at +Participant "Native\nDriver" as d + +== acquire input == + +group TIMER read-input + t -> i : get segment(stride) + activate i + t <- i : [stride] + deactivate i +end + + group BLOCK striderate + t -> t: block until\nunthrottled + end + +group TIMER strides + + loop over cycle values in segment + + group BLOCK cyclerate + t -> t: block until\nunthrottled + end + + group TIMER cycle + t -> a : runCycle(cycle) + activate a + + group TIMER bind + a -> at: bind + activate at + + at -> d: ... [if needed] + at <- d: ... + a <- at: op + deactivate at + end + + group HISTOGRAM tries + loop over tries + + group TIMER execute + a --> d: execute + activate d + a <-- d: future + deactivate d + end + + group TIMERS result\n& result-success[result-success\nwhen no error] + a -> d: execute + activate d + a <- d: future + deactivate d + end + + end + + t <- a : result + deactivate a + end + end + end +end # strides + +@enduml