docs improvements

2025-02-25 18:55:28 -06:00 · 2021-04-02 10:34:07 -05:00
parent 7997c62148
commit 5be14eeda6
3 changed files with 129 additions and 12 deletions
--- a/engine-api/src/main/java/io/nosqlbench/engine/api/activityapi/core/ActivityInstrumentation.java
+++ b/engine-api/src/main/java/io/nosqlbench/engine/api/activityapi/core/ActivityInstrumentation.java
@@ -21,6 +21,8 @@ import com.codahale.metrics.Counter;
 import com.codahale.metrics.Histogram;
 import com.codahale.metrics.Timer;

+import java.util.concurrent.Future;
+
 /**
 * All the accessors of the metrics that will be used for each activity instance.
 * Implementors of this interface should ensure that the methods are synchronized
@@ -33,13 +35,13 @@ public interface ActivityInstrumentation {
    /**
     * The input timer measures how long it takes to get the cycle value to be used for
     * an operation.
-     * @return A new or existing Timer
+     * @return a new or existing {@link Timer}
     */
    Timer getOrCreateInputTimer();

    /**
     * The strides service timer measures how long it takes to complete a stride of work.
-     * @return A new or existing Timer
+     * @return a new or existing {@link Timer}
     */
    Timer getOrCreateStridesServiceTimer();

@@ -48,13 +50,13 @@ public interface ActivityInstrumentation {
     * time a stride should start to when it completed. Stride scheduling is only defined
     * when it is implied by a stride rate limiter, so this method should return null if
     * there is no strides rate limiter.
-     * @return A new or existing Timer if appropriate, else null
+     * @return a new or existing {@link Timer} if appropriate, else null
     */
    Timer getStridesResponseTimerOrNull();

    /**
     * The cycles service timer measures how long it takes to complete a cycle of work.
-     * @return A new or existing Timer
+     * @return a new or existing {@link Timer}
     */
    Timer getOrCreateCyclesServiceTimer();

@@ -63,13 +65,13 @@ public interface ActivityInstrumentation {
     * time an operation should start to when it is completed. Cycle scheduling is only defined
     * when it is implied by a cycle rate limiter, so this method should return null if
     * there is no cycles rate limiter.
-     * @return A new or existing Timer if appropriate, else null
+     * @return a new or existing {@link Timer} if appropriate, else null
     */
    Timer getCyclesResponseTimerOrNull();

    /**
     * The phases service timer measures how long it takes to complete a phase of work.
-     * @return A new or existing Timer
+     * @return a new or existing {@link Timer}
     */
    Timer getOrCreatePhasesServiceTimer();

@@ -78,26 +80,71 @@ public interface ActivityInstrumentation {
     * time a phase should start to when it is completed. Phase scheduling is only defined
     * when it is implied by a phase rate limiter, so this method should return null if
     * there is no phases rate limiter.
-     * @return A new or existing Timer if appropriate, else null
+     * @return a new or existing {@link Timer} if appropriate, else null
     */
    Timer getPhasesResponseTimerOrNull();

    /**
     * The pending ops counter keeps track of how many ops are submitted or in-flight, but
     * which haven't been completed yet.
-     * @return A new or existing Counter
+     * @return a new or existing {@link Counter}
     */
    Counter getOrCreatePendingOpCounter();

    Counter getOrCreateOpTrackerBlockedCounter();

+    /**
+     * The bind timer keeps track of how long it takes for NoSQLBench to create an instance
+     * of an executable operation, given the cycle. This is usually done by using an
+     * {@link io.nosqlbench.engine.api.activityapi.planning.OpSequence} in conjunction with
+     * an {@link io.nosqlbench.engine.api.activityimpl.OpDispenser}. This is named for "binding
+     * a cycle to an operation".
+     * @return a new or existing {@link Timer}
+     */
    Timer getOrCreateBindTimer();

+    /**
+     * The execute timer keeps track of how long it takes to submit an operation to be executed
+     * to an underlying native driver. For asynchronous APIs, such as those which return a
+     * {@link Future}, this is simply the amount of time it takes to acquire the future.
+     *
+     * When possible, APIs should be used via their async methods, even if you are implementing
+     * a {@link SyncAction}. This allows the execute timer to measure the hand-off to the underlying API,
+     * and the result timer to measure the blocking calls to aquire the result.
+     * @return a new or existing {@link Timer}
+     */
    Timer getOrCreateExecuteTimer();

+    /**
+     * The result timer keeps track of how long it takes a native driver to service a request once submitted.
+     * This timer, in contrast to the result-success timer ({@link #getOrCreateResultSuccessTimer()}),
+     * is used to track all operations. That is, no matter
+     * whether the operation succeeds or not, it should be tracked with this timer. The scope of this timer should
+     * cover each attempt at an operation through a native driver. Retries are not to be combined in this measurement.
+     * @return a new or existing {@link Timer}
+     */
    Timer getOrCreateResultTimer();

+    /**
+     * The result-success timer keeps track of operations which had no exception. The measurements for this timer should
+     * be exactly the same values as used for the result timer ({@link #getOrCreateResultTimer()}, except that
+     * attempts to complete an operation which yield an exception should be excluded from the results. These two metrics
+     * together provide a very high level sanity check against the error-specific metrics which can be reported by
+     * the error handler logic.
+     * @return a new or existing {@link Timer}
+     */
    Timer getOrCreateResultSuccessTimer();

+    /**
+     * The tries histogram tracks how many tries it takes to complete an operation successfully, or not. This histogram
+     * does not encode whether operations were successful or not. Ideally, if every attempt to complete an operation succeeds
+     * on its first try, the data in this histogram should all be 1. In practice, systems which are running near their
+     * capacity will see a few retried operations, and systems that are substantially over-driven will see many retried
+     * operations. As the retries value increases the further down the percentile scale you go, you can detect system loading
+     * patterns which are in excess of the real-time capability of the target system.
+     *
+     * This metric should be measured around every retry loop for a native operation.
+     * @return a new or existing {@link Histogram}
+     */
    Histogram getOrCreateTriesHistogram();
 }
--- a/engine-docs/src/main/resources/docs-for-nb/testing_practice/layers.puml
+++ b/engine-docs/src/main/resources/docs-for-nb/testing_practice/layers.puml
@@ -2,18 +2,18 @@
 header Layers of Messaging
 hide footbox

-box "User View" #clear
+box "User View" #white
 actor Alice as user
 collections "User\nAgent" as agent
 collections "Client\nSide\nApp" as capp
 end box

-box "WebApp View" #clear
+box "WebApp View" #white
 boundary "App Server\nor node, ..." as sapp
 boundary "Database\nDriver" as driver
 end box

-box "DB View" #clear
+box "DB View" #white
 database DB as db
 boundary "Storage\nService" as store
 entity "Storage\nDevice" as device
@@ -50,4 +50,4 @@ user <- agent: browser\nrenders page
 deactivate agent


-@enduml
+@enduml
--- a/sort_docs/eb_metrics_full.puml
+++ b/sort_docs/eb_metrics_full.puml
@@ -0,0 +1,70 @@
+@startuml
+
+Participant Input as i
+Participant Thread as t
+Participant Action as a
+Participant Activity as at
+Participant "Native\nDriver" as d
+
+== acquire input ==
+
+group TIMER read-input
+ t -> i : get segment(stride)
+ activate i
+ t <- i : <cycle segment>[stride]
+ deactivate i
+end
+
+  group BLOCK striderate
+  t -> t: block until\nunthrottled
+  end
+
+group TIMER strides
+
+  loop over cycle values in segment
+
+    group BLOCK cyclerate
+     t -> t: block until\nunthrottled
+    end
+
+    group TIMER cycle
+      t -> a : runCycle(cycle)
+      activate a
+
+      group TIMER bind
+        a -> at: bind
+        activate at
+
+        at -> d: ... [if needed]
+        at <- d: ...
+        a <- at: op
+        deactivate at
+      end
+
+      group HISTOGRAM tries
+        loop over tries
+
+          group TIMER execute
+            a --> d: execute
+            activate d
+            a <-- d: future
+            deactivate d
+          end
+
+          group TIMERS result\n& result-success[result-success\nwhen no error]
+            a -> d: execute
+            activate d
+            a <- d: future
+            deactivate d
+          end
+
+        end
+
+        t <- a : result
+        deactivate a
+      end
+    end
+  end
+end # strides
+
+@enduml