/* Copyright (C) 2011 Statoil ASA, Norway. The file 'parser.h' is part of ERT - Ensemble based Reservoir Tool. ERT is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. ERT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License at for more details. */ #ifndef __PARSER_H__ #define __PARSER_H__ #include typedef struct parser_struct parser_type; /** GENERAL OVERVIEW The parser_type is used to create a series of "tokens" from a file or string buffer. In it's simplest form, we define a token as a subset of a string separated by by some split characters. For example, if we define the normal space (i.e. " ") as the only split character, "tokenizing" the string "I like beer " would give the following result: Token number 0 is "I" Token number 1 is "like" Token number 2 is "beer" Note that all the white space (i.e. split characters) have been removed. COMMENTS The parser can ignore comments when tokenzing a file or buffer. To enable this feature, allocate the parser_type with comment_start and comment_end different from NULL. For example if we set both comment_start and comment_end to "##", tokenizing "I ## really ## like beer" would give: Token number 0 is "I" Token number 1 is "like" Token number 2 is "beer" SPECIAL CHARACTERS Some times it can be useful to define a set of characters which behave like white space in the sense that they separate tokens in the source, but they do not get dropped. For example, letting "=" be a special character, tokenzing "key=value" would give: Token number 0 is "key" Token number 1 is "=" Token number 2 is "value" The special characters are given in the "specials" string when allocating the parser. QUOTERS When parsing user input, the user often wants to provide e.g. a filename with a white-space character in it. To support this, the parser can be given a set of quoters. For example, letting " " be white space and adding "'" to the quoters, tokenizing "my_file = 'my documents with space in.txt'" would give: Token number 0 is "my_file" Token number 1 is "=" Token number 2 is "'my documents with space in.txt'" If wanted, the quoting characters can be removed using the strip_quote_marks options when running the parser on the buffer. The last token in the example above would then be: Token number 2 is "my documents with space in.txt" To use one of the quoter characters in a string, place a "\" in front of it. Building on our previous example, let the string be "my_file = 'my \'doc.txt'" Tokenzing this with strip_quote_marks set to true would give: Token number 0 is "my_file" Token number 1 is "=" Token number 2 is "my 'doc.txt" Note that the "\" in front of"'" has been removed. If strip_quote_marks is set to false, the result is: Token number 0 is "my_file" Token number 1 is "=" Token number 2 is "'my \'doc.txt'" */ parser_type * parser_alloc( const char * whitespace, /** Set to NULL if not interessting. */ const char * quoters, /** Set to NULL if not interessting. */ const char * specials, /** Set to NULL if not interessting. */ const char * delete_set, const char * comment_start, /** Set to NULL if not interessting. */ const char * comment_end); /** Set to NULL if not interessting. */ void parser_set_splitters( parser_type * parser , const char * splitters ); void parser_set_quoters( parser_type * parser , const char * quoters ); void parser_set_specials( parser_type * parser , const char * specials ); void parser_set_delete_set( parser_type * parser , const char * delete_set ); void parser_set_comment_start( parser_type * parser , const char * comment_start ); void parser_set_comment_end( parser_type * parser , const char * comment_end ); void parser_free( parser_type * parser); stringlist_type * parser_tokenize_buffer( const parser_type * parser, const char * buffer, bool strip_quote_marks); stringlist_type * parser_tokenize_file( const parser_type * parser, const char * filename, bool strip_quote_marks); /* Pollution by Joakim: */ void parser_strip_buffer(const parser_type * parser , char ** __buffer); bool parser_fseek_string(const parser_type * parser , FILE * stream , const char * string , bool skip_string , bool case_sensitive); char * parser_fread_alloc_file_content(const char * filename , const char * quote_set , const char * delete_set , const char * comment_start , const char * comment_end); #endif