Enhance pulsar and kafka adapters documentation

2025-02-25 18:55:28 -06:00 · 2023-03-07 14:15:21 +02:00 · 2023-03-07 14:15:21 +02:00 · 7d5ecf2f32
commit 7d5ecf2f32
parent e7730ae281
4 changed files with 29 additions and 3 deletions
--- a/adapter-kafka/src/main/resources/README.md
+++ b/adapter-kafka/src/main/resources/README.md
@ -39,3 +39,24 @@ $ <nb_cmd> run driver=kafka -vv cycles=100 threads=4 num_clnt=2 num_cons_grp=2 y

 * `num_cons_grp`: the number of consumer groups
    * Only relevant for consumer workload
+
+
+
+For the Kafka NB adapter, Document level parameters can only be statically bound; and currently, the following Document level configuration parameters are supported:
+
+* `async_api` (boolean):
+    * When true, use async Kafka client API.
+* `seq_tracking` (boolean):
+    * When true, a sequence number is created as part of each message's properties
+    * This parameter is used in conjunction with the next one in order to simulate abnormal message processing errors and then be able to detect such errors successfully.
+* `seqerr_simu`:
+    * A list of error simulation types separated by comma (,)
+    * Valid error simulation types
+        * `out_of_order`: simulate message out of sequence
+        * `msg_loss`: simulate message loss
+        * `msg_dup`: simulate message duplication
+    * This value should be used only for testing purposes. It is not recommended to use this parameter in actual testing environments.
+* `e2e_starting_time_source`:
+    * Starting timestamp for end-to-end operation. When specified, will update the `e2e_msg_latency` histogram with the calculated end-to-end latency. The latency is calculated by subtracting the starting time from the current time. The starting time is determined from a configured starting time source. The unit of the starting time is milliseconds since epoch.
+    * The possible values for `e2e_starting_time_source`:
+        * `message_publish_time` : uses the message publishing timestamp as the starting time. The message publishing time, in this case, [is computed by the Kafka client on record generation](https://kafka.apache.org/34/javadoc/org/apache/kafka/clients/producer/ProducerRecord.html). This is the case, as [`CreateTime` is the default](https://docs.confluent.io/platform/current/installation/configuration/topic-configs.html#message-timestamp-type).
--- a/adapter-kafka/src/main/resources/kafka_consumer.yaml
+++ b/adapter-kafka/src/main/resources/kafka_consumer.yaml
@ -1,9 +1,11 @@
-# document level parameters that apply to all Pulsar client types:
+# document level parameters that apply to all Kafka client types:
 params:
  # Whether to commit message asynchronously
  # - default: true
  # - only relevant for manual commit
 #  async_api: "true"
+  # activates e2e latency metrics
+  # - default: "none" (i.e. disabled)
  e2e_starting_time_source: "message_publish_time"
  # activates e2e error metrics (message duplication, message loss and out-of-order detection)
  # it needs to be enabled both on the producer and the consumer
@ -16,7 +18,7 @@ blocks:
      op1:
        ## The value represents the topic names
        #  - for consumer, a list of topics (separated by comma) are supported
-        MessageConsume: "nbktest1,nbktest2"
+        MessageConsume: "nbktest"

        # The timeout value to poll messages (unit: milli-seconds)
        # - default: 0
--- a/adapter-kafka/src/main/resources/kafka_producer.yaml
+++ b/adapter-kafka/src/main/resources/kafka_producer.yaml
@ -13,6 +13,8 @@ params:
  # it needs to be enabled both on the producer and the consumer
  # - default: false
  seq_tracking: "true"
+  # test error injection, remove in production
+  seqerr_simu: 'out_of_order,msg_loss,msg_dup'

 blocks:
  msg-produce-block:
@ -26,7 +28,7 @@ blocks:
        # - default: 0
        # - value 0 or 1 means no transaction
        # - it also requires "transactional.id" parameter is set
-        txn_batch_num: 8
+        txn_batch_num: 1

        ## (Optional) Kafka message headers (in JSON format).
        msg_header: |
--- a/adapter-pulsar/src/main/resources/pulsar.md
+++ b/adapter-pulsar/src/main/resources/pulsar.md
@ -155,6 +155,7 @@ For the Pulsar NB driver, Document level parameters can only be statically bound
        * `out_of_order`: simulate message out of sequence
        * `msg_loss`: simulate message loss
        * `msg_dup`: simulate message duplication
+    * This value should be used only for testing purposes. It is not recommended to use this parameter in actual testing environments.
 * ***e2e_starting_time_source***:
    * Starting timestamp for end-to-end operation. When specified, will update the `e2e_msg_latency` histogram with the calculated end-to-end latency. The latency is calculated by subtracting the starting time from the current time. The starting time is determined from a configured starting time source. The unit of the starting time is milliseconds since epoch.
    * The possible values for `e2e_starting_time_source`: