updated grammar to support UDTs and reserved keywords as identifiers

This commit is contained in:
Pawan Kumar 2024-04-22 19:24:54 +05:30
parent dead305986
commit 07ae3b8636
4 changed files with 173 additions and 27 deletions

View File

@ -6,26 +6,26 @@ options {
// Operators and Punctuators
LR_BRACKET: '(';
LR_BRACKET: '(' -> pushMode(IDENTIFIER_MODE);
RR_BRACKET: ')';
LC_BRACKET: '{';
LC_BRACKET: '{' -> pushMode(IDENTIFIER_MODE);
RC_BRACKET: '}';
LS_BRACKET: '[';
LS_BRACKET: '[' -> pushMode(IDENTIFIER_MODE);
RS_BRACKET: ']';
COMMA: ',';
COMMA: ',' -> pushMode(IDENTIFIER_MODE);
SEMI: ';';
COLON: ':';
DOT: '.';
DOT: '.' -> pushMode(IDENTIFIER_MODE);
STAR: '*';
DIVIDE: '/';
MODULE: '%';
PLUS: '+';
PLUS: '+' -> pushMode(IDENTIFIER_MODE);
MINUSMINUS: '--';
MINUS: '-';
DQUOTE: '"';
SQUOTE: '\'';
OPERATOR_EQ: '=';
OPERATOR_LT: '<';
OPERATOR_EQ: '=' -> pushMode(IDENTIFIER_MODE);
OPERATOR_LT: '<' -> pushMode(IDENTIFIER_MODE);
OPERATOR_GT: '>';
OPERATOR_LTE: '<=';
OPERATOR_GTE: '>=';
@ -37,7 +37,7 @@ K_AGGREGATE: 'AGGREGATE';
K_ALL: 'ALL';
K_ALLOW: 'ALLOW';
K_ALTER: 'ALTER';
K_AND: 'AND';
K_AND: 'AND' -> pushMode(IDENTIFIER_MODE);
K_ANY: 'ANY';
K_APPLY: 'APPLY';
K_AS: 'AS';
@ -63,7 +63,7 @@ K_DURABLE_WRITES: 'DURABLE_WRITES';
K_EACH_QUORUM: 'EACH_QUORUM';
K_ENTRIES: 'ENTRIES';
K_EXECUTE: 'EXECUTE';
K_EXISTS: 'EXISTS';
K_EXISTS: 'EXISTS' -> pushMode(IDENTIFIER_MODE);
K_FALSE: 'FALSE';
K_FILTERING: 'FILTERING';
K_FINALFUNC: 'FINALFUNC';
@ -84,7 +84,7 @@ K_IS: 'IS';
K_JSON: 'JSON';
K_KEY: 'KEY';
K_KEYS: 'KEYS';
K_KEYSPACE: 'KEYSPACE';
K_KEYSPACE: 'KEYSPACE' -> pushMode(IDENTIFIER_MODE);
K_KEYSPACES: 'KEYSPACES';
K_LANGUAGE: 'LANGUAGE';
// Disabled because there was no definitive reference to this as a bare keyword in the specs
@ -101,8 +101,8 @@ K_NORECURSIVE: 'NORECURSIVE';
K_NOSUPERUSER: 'NOSUPERUSER';
K_NOT: 'NOT';
K_NULL: 'NULL';
K_OF: 'OF';
K_ON: 'ON';
K_OF: 'OF' -> pushMode(IDENTIFIER_MODE);
K_ON: 'ON' -> pushMode(IDENTIFIER_MODE);
K_ONE: 'ONE';
K_OPTIONS: 'OPTIONS';
K_OR: 'OR';
@ -114,7 +114,7 @@ K_PERMISSION: 'PERMISSION';
K_PERMISSIONS: 'PERMISSIONS';
K_PRIMARY: 'PRIMARY';
K_QUORUM: 'QUORUM';
K_RENAME: 'RENAME';
K_RENAME: 'RENAME' -> pushMode(IDENTIFIER_MODE);
K_REPLACE: 'REPLACE';
K_REPLICATION: 'REPLICATION';
K_RETURNS: 'RETURNS';
@ -123,32 +123,32 @@ K_ROLE: 'ROLE';
K_ROLES: 'ROLES';
K_SCHEMA: 'SCHEMA';
K_SELECT: 'SELECT';
K_SET: 'SET';
K_SET: 'SET' -> pushMode(IDENTIFIER_MODE);
K_SFUNC: 'SFUNC';
K_STATIC: 'STATIC';
K_STORAGE: 'STORAGE';
K_STYPE: 'STYPE';
K_SUPERUSER: 'SUPERUSER';
K_TABLE: 'TABLE';
K_TABLE: 'TABLE' -> pushMode(IDENTIFIER_MODE);
K_THREE: 'THREE';
K_TIMESTAMP: 'TIMESTAMP';
K_TO: 'TO';
K_TO: 'TO' -> pushMode(IDENTIFIER_MODE);
K_TOKEN: 'TOKEN';
K_TRIGGER: 'TRIGGER';
K_TRUE: 'TRUE';
K_TRUNCATE: 'TRUNCATE';
K_TRUNCATE: 'TRUNCATE' -> pushMode(IDENTIFIER_MODE);
K_TTL: 'TTL';
K_TWO: 'TWO';
K_TYPE: 'TYPE';
K_TYPE: 'TYPE' -> pushMode(IDENTIFIER_MODE);
K_UNLOGGED: 'UNLOGGED';
K_UPDATE: 'UPDATE';
K_USE: 'USE';
K_UPDATE: 'UPDATE' -> pushMode(IDENTIFIER_MODE);
K_USE: 'USE' -> pushMode(IDENTIFIER_MODE);
K_USER: 'USER';
K_USING: 'USING';
K_UUID: 'UUID';
K_VALUES: 'VALUES';
K_VIEW: 'VIEW';
K_WHERE: 'WHERE';
K_WHERE: 'WHERE' -> pushMode(IDENTIFIER_MODE);
K_WITH: 'WITH';
K_WRITETIME: 'WRITETIME';
K_ASCII: 'ASCII';
@ -221,5 +221,92 @@ fragment DEC_DIGIT: [0-9];
fragment EXPONENT_NUM_PART: 'E' ('-'|'+') ? DEC_DIGIT+;
mode IDENTIFIER_MODE;
SEMI_: SEMI -> type(SEMI), popMode;
LC_BRACKET_: LC_BRACKET -> type(LC_BRACKET);
LS_BRACKET_: LS_BRACKET -> type(LS_BRACKET);
LR_BRACKET_: LR_BRACKET -> type(LR_BRACKET);
OPERATOR_LT_: OPERATOR_LT -> type(OPERATOR_LT);
RR_BRACKET_: RR_BRACKET -> type(RR_BRACKET), popMode;
RC_BRACKET_: RC_BRACKET -> type(RC_BRACKET), popMode;
RS_BRACKET_: RS_BRACKET -> type(RS_BRACKET), popMode;
OPERATOR_GT_: OPERATOR_GT -> type(OPERATOR_GT), popMode;
// Reserved Keywords which cannot be used as identifiers
// https://cassandra.apache.org/doc/4.1/cassandra/cql/appendices.html#appendix-A
K_ADD_: K_ADD -> type(K_ADD), popMode;
K_AGGREGATE_: K_AGGREGATE -> type(K_AGGREGATE), popMode;
K_ALLOW_: K_ALLOW -> type(K_ALLOW), popMode;
K_ALTER_: K_ALTER -> type(K_ALTER), popMode;
K_AND_: K_AND -> type(K_AND), popMode;
K_ANY_: K_ANY -> type(K_ANY), popMode;
K_APPLY_: K_APPLY -> type(K_APPLY), popMode;
K_ASC_: K_ASC -> type(K_ASC), popMode;
K_AUTHORIZE_: K_AUTHORIZE -> type(K_AUTHORIZE), popMode;
K_BATCH_: K_BATCH -> type(K_BATCH), popMode;
K_BEGIN_: K_BEGIN -> type(K_BEGIN), popMode;
K_BY_: K_BY -> type(K_BY), popMode;
K_COLUMNFAMILY_: K_COLUMNFAMILY -> type(K_COLUMNFAMILY), popMode;
K_CREATE_: K_CREATE -> type(K_CREATE), popMode;
K_DELETE_: K_DELETE -> type(K_DELETE), popMode;
K_DESC_: K_DESC -> type(K_DESC), popMode;
K_DROP_: K_DROP -> type(K_DROP), popMode;
K_DURABLE_WRITES_: K_DURABLE_WRITES -> type(K_DURABLE_WRITES), popMode;
K_ENTRIES_: K_ENTRIES -> type(K_ENTRIES), popMode;
K_FALSE_: K_FALSE -> type(K_FALSE), popMode;
K_FROM_: K_FROM -> type(K_FROM), popMode;
K_FULL_: K_FULL -> type(K_FULL), popMode;
K_GRANT_: K_GRANT -> type(K_GRANT), popMode;
K_IF_: K_IF -> type(K_IF), popMode;
K_IN_: K_IN -> type(K_IN), popMode;
K_INDEX_: K_INDEX -> type(K_INDEX), popMode;
K_INFINITY_: K_INFINITY -> type(K_INFINITY), popMode;
K_INSERT_: K_INSERT -> type(K_INSERT), popMode;
K_INTO_: K_INTO -> type(K_INTO), popMode;
//K_IS: 'IS';
K_KEYSPACE_: K_KEYSPACE -> type(K_KEYSPACE), popMode;
K_LIMIT_: K_LIMIT -> type(K_LIMIT), popMode;
K_LOGGED_: K_LOGGED -> type(K_LOGGED), popMode;
K_MODIFY_: K_MODIFY -> type(K_MODIFY), popMode;
K_NAN_: K_NAN -> type(K_NAN), popMode;
K_NORECURSIVE_: K_NORECURSIVE -> type(K_NORECURSIVE), popMode;
K_NOT_: K_NOT -> type(K_NOT), popMode;
K_NULL_: K_NULL -> type(K_NULL), popMode;
K_OF_: K_OF -> type(K_OF), popMode;
K_ON_: K_ON -> type(K_ON), popMode;
K_OR_: K_OR -> type(K_OR), popMode;
K_ORDER_: K_ORDER -> type(K_ORDER), popMode;
K_PRIMARY_: K_PRIMARY -> type(K_PRIMARY), popMode;
K_RENAME_: K_RENAME -> type(K_RENAME), popMode;
K_REPLACE_: K_REPLACE -> type(K_REPLACE), popMode;
K_REVOKE_: K_REVOKE -> type(K_REVOKE), popMode;
K_SCHEMA_: K_SCHEMA -> type(K_SCHEMA), popMode;
K_SELECT_: K_SELECT -> type(K_SELECT), popMode;
K_SET_: K_SET -> type(K_SET), popMode;
K_TABLE_: K_TABLE -> type(K_TABLE), popMode;
K_TO_: K_TO -> type(K_TO), popMode;
K_TOKEN_: K_TOKEN -> type(K_TOKEN), popMode;
K_TRUNCATE_: K_TRUNCATE -> type(K_TRUNCATE), popMode;
K_UNLOGGED_: K_UNLOGGED -> type(K_UNLOGGED), popMode;
K_UPDATE_: K_UPDATE -> type(K_UPDATE), popMode;
K_USE_: K_USE -> type(K_USE), popMode;
K_USING_: K_USING -> type(K_USING), popMode;
K_WHERE_: K_WHERE -> type(K_WHERE), popMode;
K_WITH_: K_WITH -> type(K_WITH), popMode;
OBJECT_NAME_ : OBJECT_NAME -> type(OBJECT_NAME), popMode;
SPACE_: [ \t\r\n]+ -> channel (HIDDEN);
UUID_: UUID -> type(UUID), popMode;
// Literals
CODE_BLOCK_: CODE_BLOCK -> type(CODE_BLOCK), popMode;
STRING_LITERAL_: STRING_LITERAL -> type(STRING_LITERAL), popMode;
DECIMAL_LITERAL_: DECIMAL_LITERAL -> type(DECIMAL_LITERAL), popMode;
FLOAT_LITERAL_: FLOAT_LITERAL -> type(FLOAT_LITERAL), popMode;
HEXADECIMAL_LITERAL_: HEXADECIMAL_LITERAL -> type(HEXADECIMAL_LITERAL), popMode;
REAL_LITERAL_ : REAL_LITERAL -> type(REAL_LITERAL), popMode;

View File

@ -136,7 +136,7 @@ createType
;
typeMemberColumnList
: column dataType (syntaxComma column dataType)*
: column dataType (syntaxComma column dataType)* syntaxComma?
;
createTrigger
@ -244,8 +244,8 @@ alterType
;
alterTypeOperation
: alterTypeAlterType
| alterTypeAdd
// : alterTypeAlterType
: alterTypeAdd
| alterTypeRename
;
@ -262,7 +262,7 @@ alterTypeRenameItem
;
alterTypeAdd
: kwAdd column dataType (syntaxComma column dataType)*
: kwAdd column dataType
;
alterTypeAlterType
@ -767,6 +767,7 @@ stringLiteral
booleanLiteral
: K_TRUE
| K_FALSE
| OBJECT_NAME
;
hexadecimalLiteral
@ -790,7 +791,7 @@ column
dataType
: dataTypeName
| K_FROZEN syntaxBracketLa dataType syntaxBracketRa
| kwFrozen syntaxBracketLa dataType syntaxBracketRa
| K_SET syntaxBracketLa dataType syntaxBracketRa
| K_LIST syntaxBracketLa dataType syntaxBracketRa
| K_MAP syntaxBracketLa dataType syntaxComma dataType syntaxBracketRa
@ -901,6 +902,7 @@ kwAggregate
kwAll
: K_ALL
| OBJECT_NAME
;
kwAllPermissions
@ -915,6 +917,11 @@ kwAlter
: K_ALTER
;
kwFrozen
: K_FROZEN
| OBJECT_NAME
;
kwAnd
: K_AND
;
@ -1101,6 +1108,7 @@ kwLogged
kwLogin
: K_LOGIN
| OBJECT_NAME
;
kwMaterialized
@ -1209,6 +1217,7 @@ kwTable
kwTimestamp
: K_TIMESTAMP
| OBJECT_NAME
;
kwTo

View File

@ -21,6 +21,12 @@ import io.nosqlbench.cqlgen.parser.CqlModelParser;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Objects;
public class CqlParserHarnessTest {
private final static String ksddl = """
@ -81,5 +87,22 @@ public class CqlParserHarnessTest {
""", null);
}
@Test
@Disabled
public void testCqlExamples() throws IOException {
File folderPath = new File("src/test/resources/cql3_examples");
for (final File file : Objects.requireNonNull(folderPath.listFiles())) {
String query = Files.readString(Path.of(file.getPath()));
CqlModelParser.parse(query, null);
}
}
@Disabled
@Test
public void testUdt() {
CGWorkloadExporter exporter = new CGWorkloadExporter();
exporter.applyAsInt(new String[] {"src/test/resources/testschemas/cql_udt.cql", "cql_udt.yaml"});
exporter.setNamingTemplate("[OPTYPE-][COLUMN-][TYPEDEF-][TABLE-]-[KEYSPACE]");
exporter.getWorkloadAsYaml();
}
}

View File

@ -0,0 +1,27 @@
CREATE KEYSPACE baselines
WITH REPLICATION = {
'class' : 'SimpleStrategy',
'replication_factor' : 1
};
CREATE TYPE baselines.phone (
country_code int,
number text,
);
CREATE TYPE baselines.address (
street text,
city text,
zip text,
phones map<text, phone>
);
CREATE TABLE baselines.user (
name text PRIMARY KEY,
addresses map<text, frozen<address>>
);
ALTER TYPE baselines.address RENAME zip TO zipcode AND city to city_code;
ALTER TYPE baselines.address ADD country text;
DROP TYPE IF EXISTS baselines.address;