From 5be14eeda65516f5522c4c054528ddd376cde858 Mon Sep 17 00:00:00 2001
From: Jonathan Shook <jshook@gmail.com>
Date: Fri, 2 Apr 2021 10:34:07 -0500
Subject: [PATCH] docs improvements

---
 .../core/ActivityInstrumentation.java         | 63 ++++++++++++++---
 .../docs-for-nb/testing_practice/layers.puml  |  8 +--
 sort_docs/eb_metrics_full.puml                | 70 +++++++++++++++++++
 3 files changed, 129 insertions(+), 12 deletions(-)
 create mode 100644 sort_docs/eb_metrics_full.puml

diff --git a/engine-api/src/main/java/io/nosqlbench/engine/api/activityapi/core/ActivityInstrumentation.java b/engine-api/src/main/java/io/nosqlbench/engine/api/activityapi/core/ActivityInstrumentation.java
index 2292aa7da..881628145 100644
--- a/engine-api/src/main/java/io/nosqlbench/engine/api/activityapi/core/ActivityInstrumentation.java
+++ b/engine-api/src/main/java/io/nosqlbench/engine/api/activityapi/core/ActivityInstrumentation.java
@@ -21,6 +21,8 @@ import com.codahale.metrics.Counter;
 import com.codahale.metrics.Histogram;
 import com.codahale.metrics.Timer;
 
+import java.util.concurrent.Future;
+
 /**
  * All the accessors of the metrics that will be used for each activity instance.
  * Implementors of this interface should ensure that the methods are synchronized
@@ -33,13 +35,13 @@ public interface ActivityInstrumentation {
     /**
      * The input timer measures how long it takes to get the cycle value to be used for
      * an operation.
-     * @return A new or existing Timer
+     * @return a new or existing {@link Timer}
      */
     Timer getOrCreateInputTimer();
 
     /**
      * The strides service timer measures how long it takes to complete a stride of work.
-     * @return A new or existing Timer
+     * @return a new or existing {@link Timer}
      */
     Timer getOrCreateStridesServiceTimer();
 
@@ -48,13 +50,13 @@ public interface ActivityInstrumentation {
      * time a stride should start to when it completed. Stride scheduling is only defined
      * when it is implied by a stride rate limiter, so this method should return null if
      * there is no strides rate limiter.
-     * @return A new or existing Timer if appropriate, else null
+     * @return a new or existing {@link Timer} if appropriate, else null
      */
     Timer getStridesResponseTimerOrNull();
 
     /**
      * The cycles service timer measures how long it takes to complete a cycle of work.
-     * @return A new or existing Timer
+     * @return a new or existing {@link Timer}
      */
     Timer getOrCreateCyclesServiceTimer();
 
@@ -63,13 +65,13 @@ public interface ActivityInstrumentation {
      * time an operation should start to when it is completed. Cycle scheduling is only defined
      * when it is implied by a cycle rate limiter, so this method should return null if
      * there is no cycles rate limiter.
-     * @return A new or existing Timer if appropriate, else null
+     * @return a new or existing {@link Timer} if appropriate, else null
      */
     Timer getCyclesResponseTimerOrNull();
 
     /**
      * The phases service timer measures how long it takes to complete a phase of work.
-     * @return A new or existing Timer
+     * @return a new or existing {@link Timer}
      */
     Timer getOrCreatePhasesServiceTimer();
 
@@ -78,26 +80,71 @@ public interface ActivityInstrumentation {
      * time a phase should start to when it is completed. Phase scheduling is only defined
      * when it is implied by a phase rate limiter, so this method should return null if
      * there is no phases rate limiter.
-     * @return A new or existing Timer if appropriate, else null
+     * @return a new or existing {@link Timer} if appropriate, else null
      */
     Timer getPhasesResponseTimerOrNull();
 
     /**
      * The pending ops counter keeps track of how many ops are submitted or in-flight, but
      * which haven't been completed yet.
-     * @return A new or existing Counter
+     * @return a new or existing {@link Counter}
      */
     Counter getOrCreatePendingOpCounter();
 
     Counter getOrCreateOpTrackerBlockedCounter();
 
+    /**
+     * The bind timer keeps track of how long it takes for NoSQLBench to create an instance
+     * of an executable operation, given the cycle. This is usually done by using an
+     * {@link io.nosqlbench.engine.api.activityapi.planning.OpSequence} in conjunction with
+     * an {@link io.nosqlbench.engine.api.activityimpl.OpDispenser}. This is named for "binding
+     * a cycle to an operation".
+     * @return a new or existing {@link Timer}
+     */
     Timer getOrCreateBindTimer();
 
+    /**
+     * The execute timer keeps track of how long it takes to submit an operation to be executed
+     * to an underlying native driver. For asynchronous APIs, such as those which return a
+     * {@link Future}, this is simply the amount of time it takes to acquire the future.
+     *
+     * When possible, APIs should be used via their async methods, even if you are implementing
+     * a {@link SyncAction}. This allows the execute timer to measure the hand-off to the underlying API,
+     * and the result timer to measure the blocking calls to aquire the result.
+     * @return a new or existing {@link Timer}
+     */
     Timer getOrCreateExecuteTimer();
 
+    /**
+     * The result timer keeps track of how long it takes a native driver to service a request once submitted.
+     * This timer, in contrast to the result-success timer ({@link #getOrCreateResultSuccessTimer()}),
+     * is used to track all operations. That is, no matter
+     * whether the operation succeeds or not, it should be tracked with this timer. The scope of this timer should
+     * cover each attempt at an operation through a native driver. Retries are not to be combined in this measurement.
+     * @return a new or existing {@link Timer}
+     */
     Timer getOrCreateResultTimer();
 
+    /**
+     * The result-success timer keeps track of operations which had no exception. The measurements for this timer should
+     * be exactly the same values as used for the result timer ({@link #getOrCreateResultTimer()}, except that
+     * attempts to complete an operation which yield an exception should be excluded from the results. These two metrics
+     * together provide a very high level sanity check against the error-specific metrics which can be reported by
+     * the error handler logic.
+     * @return a new or existing {@link Timer}
+     */
     Timer getOrCreateResultSuccessTimer();
 
+    /**
+     * The tries histogram tracks how many tries it takes to complete an operation successfully, or not. This histogram
+     * does not encode whether operations were successful or not. Ideally, if every attempt to complete an operation succeeds
+     * on its first try, the data in this histogram should all be 1. In practice, systems which are running near their
+     * capacity will see a few retried operations, and systems that are substantially over-driven will see many retried
+     * operations. As the retries value increases the further down the percentile scale you go, you can detect system loading
+     * patterns which are in excess of the real-time capability of the target system.
+     *
+     * This metric should be measured around every retry loop for a native operation.
+     * @return a new or existing {@link Histogram}
+     */
     Histogram getOrCreateTriesHistogram();
 }
diff --git a/engine-docs/src/main/resources/docs-for-nb/testing_practice/layers.puml b/engine-docs/src/main/resources/docs-for-nb/testing_practice/layers.puml
index 511694f48..5ee274654 100644
--- a/engine-docs/src/main/resources/docs-for-nb/testing_practice/layers.puml
+++ b/engine-docs/src/main/resources/docs-for-nb/testing_practice/layers.puml
@@ -2,18 +2,18 @@
 header Layers of Messaging
 hide footbox
 
-box "User View" #clear
+box "User View" #white
 actor Alice as user
 collections "User\nAgent" as agent
 collections "Client\nSide\nApp" as capp
 end box
 
-box "WebApp View" #clear
+box "WebApp View" #white
 boundary "App Server\nor node, ..." as sapp
 boundary "Database\nDriver" as driver
 end box
 
-box "DB View" #clear
+box "DB View" #white
 database DB as db
 boundary "Storage\nService" as store
 entity "Storage\nDevice" as device
@@ -50,4 +50,4 @@ user <- agent: browser\nrenders page
 deactivate agent
 
 
-@enduml
\ No newline at end of file
+@enduml
diff --git a/sort_docs/eb_metrics_full.puml b/sort_docs/eb_metrics_full.puml
new file mode 100644
index 000000000..3fca5d94e
--- /dev/null
+++ b/sort_docs/eb_metrics_full.puml
@@ -0,0 +1,70 @@
+@startuml
+
+Participant Input as i
+Participant Thread as t
+Participant Action as a
+Participant Activity as at
+Participant "Native\nDriver" as d
+
+== acquire input ==
+
+group TIMER read-input
+ t -> i : get segment(stride)
+ activate i
+ t <- i : <cycle segment>[stride]
+ deactivate i
+end
+
+  group BLOCK striderate
+  t -> t: block until\nunthrottled
+  end
+
+group TIMER strides
+
+  loop over cycle values in segment
+
+    group BLOCK cyclerate
+     t -> t: block until\nunthrottled
+    end
+
+    group TIMER cycle
+      t -> a : runCycle(cycle)
+      activate a
+
+      group TIMER bind
+        a -> at: bind
+        activate at
+
+        at -> d: ... [if needed]
+        at <- d: ...
+        a <- at: op
+        deactivate at
+      end
+
+      group HISTOGRAM tries
+        loop over tries
+
+          group TIMER execute
+            a --> d: execute
+            activate d
+            a <-- d: future
+            deactivate d
+          end
+
+          group TIMERS result\n& result-success[result-success\nwhen no error]
+            a -> d: execute
+            activate d
+            a <- d: future
+            deactivate d
+          end
+
+        end
+
+        t <- a : result
+        deactivate a
+      end
+    end
+  end
+end # strides
+
+@enduml