diff --git a/adapter-mongodb/src/main/resources/activities/baselinesv2/mongodb-keyvalue2.yaml b/adapter-mongodb/src/main/resources/activities/baselinesv2/mongodb-keyvalue2.yaml index e0872c3ab..f32a23c78 100644 --- a/adapter-mongodb/src/main/resources/activities/baselinesv2/mongodb-keyvalue2.yaml +++ b/adapter-mongodb/src/main/resources/activities/baselinesv2/mongodb-keyvalue2.yaml @@ -1,3 +1,7 @@ +# Connection Guide: https://www.mongodb.com/docs/drivers/java/sync/current/fundamentals/connection/ +# nb5 run driver=mongodb workload=/path/to/mongodb-keyvalue2.yaml tags=block:schema connection='mongodb+srv://user:pass@sample-db.host.mongodb.net/?retryWrites=true&w=majority' database=baselines -vv --show-stacktraces +# nb5 run driver=mongodb workload=/path/to/mongodb-keyvalue2.yaml tags=block:rampup cycles=25 connection='mongodb+srv://user:pass@sample-db.host.mongodb.net/?retryWrites=true&w=majority' database=baselines -vv --show-stacktraces +# nb5 run driver=mongodb workload=/path/to/mongodb-keyvalue2.yaml tags='block:main-.*' cycles=25 connection='mongodb+srv://user:pass@sample-db.host.mongodb.net/?retryWrites=true&w=majority' database=baselines -vv --show-stacktraces min_version: "4.17.24" description: | @@ -7,12 +11,11 @@ scenarios: default: schema: run driver=mongodb tags==block:schema threads==1 cycles==UNDEF rampup: run driver=mongodb tags==block:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto - main: run driver=mongodb tags==block:"main.*" cycles===TEMPLATE(main-cycles,10000000) threads=auto - astra: - schema: run driver=mongodb tags==block:schema-astra threads==1 cycles==UNDEF - rampup: run driver=mongodb tags==block:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto - main: run driver=mongodb tags==block:"main.*" cycles===TEMPLATE(main-cycles,10000000) threads=auto + main: run driver=mongodb tags==block:"main-.*" cycles===TEMPLATE(main-cycles,10000000) threads=auto + drop: run driver=mongodb tags==block:drop-collection threads==1 cycles==UNDEF +params: + instrument: true bindings: seq_key: Mod(TEMPLATE(keycount,1000000000)); ToString(); seq_value: >- @@ -28,32 +31,95 @@ blocks: params: prepared: false ops: + # https://www.mongodb.com/docs/manual/reference/method/db.createCollection/ + # https://www.mongodb.com/docs/manual/core/schema-validation/specify-json-schema/ + # `clusteredIndex` only support creation of an index on `_id` field (as shown below) so its optional create-collection: | { - ... + create: "TEMPLATE(collection,keyvalue)", + clusteredIndex: { + key: { "_id": 1 }, + unique: true, + name: "_id_idx" + }, + writeConcern: { w: "majority" }, + validator: { + $jsonSchema: { + bsonType: "object", + title: "Key/Value collection schema validation", + required: [ "key" ], + properties: { + key: { + bsonType: "string", + description: "'key' must be a string and is required" + } + } + } + }, + validationLevel: "strict", + validationAction: "error", + comment: "keyvalue collection creation with strict types and required 'key' field." + } + create-key-index: | + { + createIndexes: "TEMPLATE(collection,keyvalue)", + indexes: [ + { + key: { + key: 1, + }, + name: "kv_key_idx", + unique: true + } + ], + writeConcern: { w: "majority" }, + comment: "'key' index creation for keyvalue collection. Values should be unique.", + commitQuorum: "majority" } -# create keyspace if not exists TEMPLATE(keyspace,baselines) -# WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 'TEMPLATE(rf,1)'} -# AND durable_writes = true; rampup: ops: rampup-insert: | { - ... + insert: "TEMPLATE(collection,keyvalue)", + documents: [ + { + key: "{rw_key}", + value: "{rw_value}" + } + ], + comment: "Insert documents into keyvalue collection." } main-read: params: ratio: 5 - statements: + ops: main-select: | { - ... + find: "TEMPLATE(collection,keyvalue)", + filter: { key: { $eq: "{rw_key}" } }, + readConcern: { level: "majority" }, + comment: "Find the value for the given 'key'." } main-write: params: ratio: 5 - statements: + ops: main-insert: | { - ... + insert: "TEMPLATE(collection,keyvalue)", + documents: [ + { + key: "{rw_key}", + value: "{rw_value}" + } + ], + writeConcern: { w: "majority" }, + comment: "Insert documents into keyvalue collection." } + drop-collection: + ops: + drop-collection: | + { + drop: "TEMPLATE(collection,keyvalue)", + comment: "Drop keyvalue collection to start afresh." + } \ No newline at end of file diff --git a/adapter-mongodb/src/main/resources/activities/baselinesv2/mongodb-tabular2.yaml b/adapter-mongodb/src/main/resources/activities/baselinesv2/mongodb-tabular2.yaml new file mode 100644 index 000000000..5f8775898 --- /dev/null +++ b/adapter-mongodb/src/main/resources/activities/baselinesv2/mongodb-tabular2.yaml @@ -0,0 +1,280 @@ +# Connection Guide: https://www.mongodb.com/docs/drivers/java/sync/current/fundamentals/connection/ +# nb5 run driver=mongodb workload=/path/to/mongodb-tabular2.yaml tags=block:schema connection='mongodb+srv://user:pass@sample-db.host.mongodb.net/?retryWrites=true&w=majority' database=baselines -vv --show-stacktraces +# nb5 run driver=mongodb workload=/path/to/mongodb-tabular2.yaml tags=block:rampup cycles=25 connection='mongodb+srv://user:pass@sample-db.host.mongodb.net/?retryWrites=true&w=majority' database=baselines -vv --show-stacktraces +# nb5 run driver=mongodb workload=/path/to/mongodb-tabular2.yaml tags='block:main.*' cycles=25 connection='mongodb+srv://user:pass@sample-db.host.mongodb.net/?retryWrites=true&w=majority' database=baselines -vv --show-stacktraces +min_version: "4.17.24" + +description: | + This workload is analogous to the cql-tabular2 workload, just implemented for MongoDB. + +scenarios: + default: + schema: run driver=mongodb tags==block:schema threads==1 cycles==UNDEF + rampup: run driver=mongodb tags==block:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=mongodb tags==block:"main-.*" cycles===TEMPLATE(main-cycles,10000000) threads=auto + drop: run driver=mongodb tags==block:drop-collection threads==1 cycles==UNDEF + +params: + instrument: true +bindings: + + # for ramp-up and verify phases + # + part_layout: Div(TEMPLATE(partsize,1000)); ToString() -> String + clust_layout: Mod(TEMPLATE(partsize,1000)); ToString() -> String + # todo: update these definitions to use the simpler 10,0.1, 20, 0.2, ... + data0: Add(10); HashedFileExtractToString('data/lorem_ipsum_full.txt',9,11) + data1: Add(20); HashedFileExtractToString('data/lorem_ipsum_full.txt',18,22) + data2: Add(30); HashedFileExtractToString('data/lorem_ipsum_full.txt',27,33) + data3: Add(40); HashedFileExtractToString('data/lorem_ipsum_full.txt',45,55) + data4: Add(50); HashedFileExtractToString('data/lorem_ipsum_full.txt',72,88) + data5: Add(60); HashedFileExtractToString('data/lorem_ipsum_full.txt',107,143) + data6: Add(70); HashedFileExtractToString('data/lorem_ipsum_full.txt',189,231) + data7: Add(80); HashedFileExtractToString('data/lorem_ipsum_full.txt',306,374) + + # for main phase + # for write + part_write: Hash(); Uniform(0,TEMPLATE(partcount,100))->int; ToString() -> String + clust_write: Hash(); Add(1); Uniform(0,TEMPLATE(partsize,1000000))->int; ToString() -> String + data_write: Hash(); HashedFileExtractToString('data/lorem_ipsum_full.txt',50,150) -> String + + # for read + limit: Uniform(1,10) -> int + part_read: Uniform(0,TEMPLATE(partcount,100))->int; ToString() -> String + clust_read: Add(1); Uniform(0,TEMPLATE(partsize,1000000))->int; ToString() -> String + +blocks: + schema: + params: + prepared: false + ops: + # https://www.mongodb.com/docs/manual/reference/method/db.createCollection/ + # https://www.mongodb.com/docs/manual/core/schema-validation/specify-json-schema/ + # `clusteredIndex` only support creation of an index on `_id` field (as shown below) so its optional + create-collection: | + { + create: "TEMPLATE(collection,tabular)", + writeConcern: { w: "majority" }, + validator: { + $jsonSchema: { + bsonType: "object", + title: "Tabular collection schema validation", + required: [ "part", "clust" ], + properties: { + part: { + bsonType: "string", + description: "'part' must be a string, unique and is required" + }, + clust: { + bsonType: "string", + description: "'clust' must be a string, unique and is required" + }, + data0: { + bsonType: "string", + description: "'data0' must be a string" + }, + data1: { + bsonType: "string", + description: "'data1' must be a string" + }, + data2: { + bsonType: "string", + description: "'data2' must be a string" + }, + data3: { + bsonType: "string", + description: "'data3' must be a string" + }, + data4: { + bsonType: "string", + description: "'data4' must be a string" + }, + data5: { + bsonType: "string", + description: "'data5' must be a string" + }, + data6: { + bsonType: "string", + description: "'data6' must be a string" + }, + data7: { + bsonType: "string", + description: "'data7' must be a string" + } + } + } + }, + validationLevel: "strict", + validationAction: "error", + comment: "tabular-like collection creation with strict types and required 'part' field." + } + create-part-clust-index: | + { + createIndexes: "TEMPLATE(collection,tabular)", + indexes: [ + { + key: { + part: 1, + }, + name: "tab_part_idx", + unique: false + }, + { + key: { + clust: 1, + }, + name: "tab_clust_idx", + unique: true + } + ], + writeConcern: { w: "majority" }, + comment: "'part' & 'clust' index creation for tabular collection. Values should be unique.", + commitQuorum: "majority" + } + rampup: + ops: + rampup-insert: | + { + insert: "TEMPLATE(collection,tabular)", + documents: [ + { + part: "{part_write}", + clust: "{clust_write}", + data0: "{data0}", + data1: "{data1}", + data2: "{data2}", + data3: "{data3}", + data4: "{data4}", + data5: "{data5}", + data6: "{data6}", + data7: "{data7}" + } + ], + writeConcern: { w: "majority" }, + comment: "Insert documents into tabular collection." + } + main-read: + params: + ratio: TEMPLATE(read_ratio,1) + ops: + main-select-all: + statement: | + { + find: "TEMPLATE(collection,tabular)", + filter: { part: { $eq: "{part_read}" } }, + limit: {limit}, + readConcern: { level: "majority" }, + comment: "Find the value for the given 'part'." + } + ratio: TEMPLATE(read_ratio,1) + main-select-01: + statement: | + { + find: "TEMPLATE(collection,tabular)", + filter: { part: { $eq: "{part_read}" } }, + projection: { data0: 1, data1: 1 }, + limit: {limit}, + readConcern: { level: "majority" }, + comment: "Find the data01 value for the given 'part'." + } + ratio: TEMPLATE(read_ratio,1) + main-select-0246: + statement: | + { + find: "TEMPLATE(collection,tabular)", + filter: { part: { $eq: "{part_read}" } }, + projection: { data0: 1, data2: 1, data4: 1, data6: 1 }, + limit: {limit}, + readConcern: { level: "majority" }, + comment: "Find the data0246 value for the given 'part'." + } + ratio: TEMPLATE(read_ratio,1) + main-select-1357: + statement: | + { + find: "TEMPLATE(collection,tabular)", + filter: { part: { $eq: "{part_read}" } }, + projection: { data1: 1, data3: 1, data5: 1, data7: 1 }, + limit: {limit}, + readConcern: { level: "majority" }, + comment: "Find the data1357 value for the given 'part'." + } + ratio: TEMPLATE(read_ratio,1) + main-select-0123: + statement: | + { + find: "TEMPLATE(collection,tabular)", + filter: { part: { $eq: "{part_read}" } }, + projection: { data0: 1, data1: 1, data2: 1, data3: 1 }, + limit: {limit}, + readConcern: { level: "majority" }, + comment: "Find the data0123 value for the given 'part'." + } + ratio: TEMPLATE(read_ratio,1) + main-select-4567: + statement: | + { + find: "TEMPLATE(collection,tabular)", + filter: { part: { $eq: "{part_read}" } }, + projection: { data4: 1, data5: 1, data6: 1, data7: 1 }, + limit: {limit}, + readConcern: { level: "majority" }, + comment: "Find the data4567 value for the given 'part'." + } + ratio: TEMPLATE(read_ratio,1) + main-select-67: + statement: | + { + find: "TEMPLATE(collection,tabular)", + filter: { part: { $eq: "{part_read}" } }, + projection: { data6: 1, data7: 1 }, + limit: {limit}, + readConcern: { level: "majority" }, + comment: "Find the data67 value for the given 'part'." + } + ratio: TEMPLATE(read_ratio,1) + main-select: + statement: | + { + find: "TEMPLATE(collection,tabular)", + filter: { part: { $eq: "{part_read}" } }, + projection: { data0: 1, data1: 1, data2: 1, data3: 1, data4: 1, data5: 1, data6: 1, data7: 1 }, + limit: {limit}, + readConcern: { level: "majority" }, + comment: "Find the data01234567 value for the given 'part'." + } + ratio: TEMPLATE(read_ratio,1) + main-write: + params: + ratio: TEMPLATE(write_ratio,8) + ops: + main-insert: + statement: | + { + insert: "TEMPLATE(collection,tabular)", + documents: [ + { + part: "{part_write}", + clust: "{clust_write}", + data0: "{data0}", + data1: "{data1}", + data2: "{data2}", + data3: "{data3}", + data4: "{data4}", + data5: "{data5}", + data6: "{data6}", + data7: "{data7}" + } + ], + comment: "Insert documents into tabular collection." + } + ratio: TEMPLATE(write_ratio,8) + + # The below drop-collection blocks expects the collection to exist or else this will fail + drop-collection: + ops: + drop-collection: | + { + drop: "TEMPLATE(collection,tabular)", + comment: "Drop tabular collection to start afresh." + } diff --git a/adapter-mongodb/src/main/resources/activities/baselinesv2/mongodb-timeseries2.yaml b/adapter-mongodb/src/main/resources/activities/baselinesv2/mongodb-timeseries2.yaml new file mode 100644 index 000000000..bae1b47d3 --- /dev/null +++ b/adapter-mongodb/src/main/resources/activities/baselinesv2/mongodb-timeseries2.yaml @@ -0,0 +1,171 @@ +# Connection Guide: https://www.mongodb.com/docs/drivers/java/sync/current/fundamentals/connection/ +# nb5 run driver=mongodb workload=/path/to/mongodb-timeseries2.yaml tags=block:schema connection='mongodb+srv://user:pass@sample-db.host.mongodb.net/?retryWrites=true&w=majority' database=baselines -vv --show-stacktraces +# nb5 run driver=mongodb workload=/path/to/mongodb-timeseries2.yaml tags=block:rampup cycles=25 connection='mongodb+srv://user:pass@sample-db.host.mongodb.net/?retryWrites=true&w=majority' database=baselines -vv --show-stacktraces +# nb5 run driver=mongodb workload=/path/to/mongodb-timeseries2.yaml tags='block:main.*' cycles=25 connection='mongodb+srv://user:pass@sample-db.host.mongodb.net/?retryWrites=true&w=majority' database=baselines -vv --show-stacktraces +min_version: "4.17.24" + +description: | + This workload is analogous to the cql-timeseries2 workload, just implemented for MongoDB. + +scenarios: + default: + schema: run driver=mongodb tags==block:schema threads==1 cycles==UNDEF + rampup: run driver=mongodb tags==block:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=mongodb tags==block:"main-.*" cycles===TEMPLATE(main-cycles,10000000) threads=auto + drop: run driver=mongodb tags==block:drop-collection threads==1 cycles==UNDEF + +params: + instrument: true + +bindings: + machine_id: Mod(TEMPLATE(sources,10000)); ToHashedUUID() -> java.util.UUID + sensor_name: HashedLineToString('data/variable_words.txt') + time: Mod(TEMPLATE(maxtimes,1000000000000L)); Mul(TEMPLATE(timespeed,100)L); Div(TEMPLATE(sources,10000)L); ToDate() + cell_timestamp: Mul(TEMPLATE(timespeed,100)L); Div(TEMPLATE(sources,10000)L); Mul(1000L) + sensor_value: Normal(0.0,5.0); Add(100.0) -> double + station_id: Div(TEMPLATE(sources,10000)); Mod(TEMPLATE(stations,1000); ToHashedUUID() -> java.util.UUID + data: HashedFileExtractToString('data/lorem_ipsum_full.txt',800,1200) + +blocks: + schema: + params: + prepared: false + ops: + # https://www.mongodb.com/docs/manual/reference/method/db.createCollection/ + # https://www.mongodb.com/docs/manual/core/schema-validation/specify-json-schema/ + # https://www.mongodb.com/docs/manual/core/timeseries-collections/ + # `clusteredIndex` only support creation of an index on `_id` field (as shown below) so its optional + create-collection: | + { + create: "TEMPLATE(collection,timeseries)", + timeseries: { + timeField: "time", + granularity: "seconds" + }, + writeConcern: { w: "majority" }, + validator: { + $jsonSchema: { + bsonType: "object", + title: "Timeseries collection schema validation", + required: [ "machine_id", "sensor_name", "time" ], + properties: { + machine_id: { + bsonType: "binData", + description: "'machine_id' must be UUID, unique and is required" + }, + sensor_name: { + bsonType: "string", + description: "'sensor_name' must be a string, unique and is required" + }, + time: { + bsonType: "timestamp", + description: "'time' must be a timestamp, and required" + }, + sensor_value: { + bsonType: "double", + description: "'sensor_value' must be a double" + }, + station_id: { + bsonType: "binData", + description: "'station_id' must be a UUID" + }, + data: { + bsonType: "string", + description: "'data' must be a string" + } + } + } + }, + validationLevel: "strict", + validationAction: "error", + comment: "timeseries-like collection creation with strict types and required '(machine_id, sensor_name), time' field." + } + create-part-clust-index: | + { + createIndexes: "TEMPLATE(collection,timeseries)", + indexes: [ + { + key: { + machine_id: 1, + sensor_name: 1 + }, + name: "ts_machine_id_sensor_name_idx", + unique: true + }, + { + key: { + time: -1, + }, + name: "tab_time_desc_idx", + unique: true + } + ], + writeConcern: { w: "majority" }, + comment: "'(machine_id, sensor_name), time' index creation for timeseries collection. Values should be unique. 'time' field is sorted in DESC order.", + commitQuorum: "majority" + } + # UUID https://www.mongodb.com/docs/manual/reference/bson-types/#binary-data + # Example: https://www.tutorialspoint.com/query-bindata-by-type-in-mongodb + rampup: + ops: + rampup-insert: | + { + insert: "TEMPLATE(collection,timeseries)", + documents: [ + { + machine_id: "{machine_id}", + sensor_name: "{sensor_name}", + time: BinData(4, "{time}"), + sensor_value: "{sensor_value}", + station_id: "{station_id}", + data: "{data}" + } + ], + writeConcern: { w: "majority" }, + comment: "Insert documents into timeseries collection." + } + main-read: + params: + ratio: TEMPLATE(read_ratio,1) + ops: + select-read: + statement: | + { + find: "TEMPLATE(collection,timeseries)", + filter: { machine_id: { $eq: "{machine_id}" }, sensor_name: { $eq: "{sensor_name}" } }, + limit: TEMPLATE(limit,10), + readConcern: { level: "majority" }, + comment: "Find the value for the given 'machine_id' and 'sensor_name'." + } + ratio: TEMPLATE(read_ratio,1) + main-write: + params: + ratio: TEMPLATE(write_ratio,9) + ops: + main-insert: + statement: | + { + insert: "TEMPLATE(collection,timeseries)", + documents: [ + { + machine_id: "{machine_id}", + sensor_name: "{sensor_name}", + time: BinData(4, "{time}"), + sensor_value: "{sensor_value}", + station_id: "{station_id}", + data: "{data}" + } + ], + writeConcern: { w: "majority" }, + comment: "Insert documents into timeseries collection." + } + ratio: TEMPLATE(write_ratio,9) + + # The below drop-collection blocks expects the collection to exist or else this will fail + drop-collection: + ops: + drop-collection: | + { + drop: "TEMPLATE(collection,timeseries)", + comment: "Drop timeseries collection to start afresh." + } \ No newline at end of file