/*description: { Here is the complete grammar of the CodeWorker's scripting language. It is expressed in the extended-BNF dialect of CodeWorker, so has the advantage of running under \CodeWorker\ for scanning scripts, and has the original feature to be auto-descriptive. } */ /* "CodeWorker": a scripting language for parsing and generating text. Copyright (C) 1996-1997, 1999-2003 Cédric Lemaire This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA To contact the author: codeworker@free.fr */ //##markup##"header" //##begin##"header" /********************************************************************* * Grammar of CodeWorker * version 4.5 ********************************************************************* * * The grammar conforms to the extended-BNF notation of CodeWorker * *********************************************************************/ //##end##"header" // Defines how to ignore insignificant characters between tokens. #ignore ::= [' ' | '\t' | '\r' | '\n' | "/*" ignoreEmbeddedComment | "//"->'\n']*; // The first production rule is the head of the grammar. // The context variable pointed to by 'this' holds information about the // type of the script to scan: // - "procedural": the backbone of the scripting language, // - "extended-BNF": includes also translation scripts, // - "template-based": the server-page notation for generation patterns, // If the type is unknown, an empty string must be passed to the context // variable. translation_unit ::= script<this>; // The type of the script is unknown: the production rule tries the 3 // types. // This rule is the less declarative. It specifies the current script's type // for scanning and catches syntax errors for looking at alternatives. // Note the possible use of a C++-template syntax for non-terminal symbols. script<""> ::= #try // '=>' is an escape mode to execute a procedural instruction // or block. => set this = "procedural"; script<"procedural"> // We don't want to stop on error if the script wasn't recognized // as a common one. #catch(sError1) | #try => set this = "extended-BNF"; script<"extended-BNF"> #catch(sError2) | #try => set this = "template-based"; script<"template-based"> #catch(sError3) | // If none of the 3 types, the error for each of them is raised. => set this = ""; => error("Not recognized as a 'CodeWorker' script:\n" + " - procedural script:\n" + sError1 + " - extended-BNF script:\n" + sError2 + " - template-based script:\n" + sError3); ; // The non terminal 'script<"procedural">' scans a common script, such as a // leader script: not BNF and not template-based. // An error is raised if a syntax error is encountered (use of '#continue'). script<"procedural"> ::= #ignore // ignore blanks and C++ comments between symbols #continue // the rest of the sequence must be valid (or the scanner raises a syntax error) [instruction]* #empty // end of file expected; because of '#continue', a syntax error is raised if not reached ; // The well-named non terminal script<"extended-BNF"> scans an extended-BNF script. // It expects a set of: // - BNF directives (case sensitive or not, ...), // - functions declaration/definition, // - production rules. // An error is raised if a syntax error is encountered (use of '#continue'). script<"extended-BNF"> ::= #ignore #continue [BNF_instruction]+ #empty; BNF_instruction ::= BNF_general_directive | FUNCTION_KEYWORD:sKeyword #continue instruction<sKeyword> | production_rule ; // The non terminal script<"template-based"> scans a template-based script. // Procedural instructions or expressions are embedded between @ symbols // or between <% and %>. // An error is raised if a syntax error is encountered (use of '#continue'). script<"template-based"> ::= #continue [ STARTING_RAW_TEXT #continue #ignore [ !preprocessor expression STARTING_TAG_OR_END | [instruction]* STARTING_TAG_OR_END ] ]+; // Called by '#ignore'. ignoreEmbeddedComment ::= ->["*/" | "/*" ignoreEmbeddedComment | "//"->'\n' ignoreEmbeddedComment]; //------------------------------------------------------------- // The expressions //------------------------------------------------------------- // A default expression (non arithmetic) that manipulates and returns // a string. expression ::= boolean_expression<false>; // A condition for 'while', 'if', ... statements. boolean_expression ::= ternary_expression<""> [boolean_operator #continue ternary_expression<"">]*; // A concatenation of string expressions: the '+' is interpreted as // concatenation. concatenation_expression ::= comparison_member_expression<false>; // The template non terminal 'expression<bNumeric>' handles a string // expression when the template variable 'bNumeric' is worth false // ('false' is a keyword of the language that means an empty string), // and an arithmetic expression when it is instantiated with true // (keyword that means "true"). // See the escape mode '$' in 'literal_expression<bNumeric>' to // understand how to swap in arithmetic mode. expression<bNumeric> ::= boolean_expression<bNumeric>; // Generic form of a boolean expression, both for arithmetic and string // expressions. boolean_expression<bNumeric> ::= ternary_expression<bNumeric> [boolean_operator #continue ternary_expression<bNumeric>]*; boolean_operator ::= "&&" | "||" | "^^" | '&' | '|' | '^'; // Non terminal of the C-like ternary operator '?' ':'. ternary_expression<bNumeric> ::= comparison_expression<bNumeric> ['?' #continue expression<bNumeric> ':' expression<bNumeric>]?; // Generic form of a comparison (both arithmetic and string). comparison_expression<bNumeric> ::= comparison_member_expression<bNumeric> [ comparison_operator #continue comparison_member_expression<bNumeric> | INSET #continue constant_set ]* ; comparison_operator ::= "<=" | "<>" | ">=" | "!=" | "==" | '<' | '=' | '>'; constant_set ::= '{' #continue [CONSTANT_STRING | CONSTANT_CHAR] [ ',' #continue [CONSTANT_STRING | CONSTANT_CHAR] ]* '}' ; // The generic non-terminal 'comparison_member_expression' is quite // particular. A comparison's member returns either a numeric (stored as // a string) or a string. Here, 'comparison_member_expression<true>' // handles an arithmetic expression. It means that it recognizes // arithmetic operators ('+' means addition instead of concatenation). comparison_member_expression<true> ::= shift_expression [sum_operator #continue shift_expression]*; sum_operator ::= PLUS | '-'; // Binary shift operators, as in C/C++/Java ... shift_expression ::= factor_expression [shift_operator #continue factor_expression]*; shift_operator ::= "<<" | ">>"; // The multiplication. factor_expression ::= literal_expression<true> [factor_operator #continue literal_expression<true>]*; factor_operator ::= '*' | '/' | '%'; // The non-terminal 'comparison_member_expression<false>' recognizes // string expressions only and '+' means concatenation. Arithmetic // operators like '-' and '%' aren't available. comparison_member_expression<false> ::= literal_expression<false> [CONCAT #continue literal_expression<false>]*; // The generic non terminal of a literal: // - string between double quotes, // - expression between parenthesis, // - arithmetic expression between '$' (cannot be reentrant), // - bitwise negation, // - constant char (interpreted as a string in CodeWorker), // - boolean negation, // - number (interpreted as a string in CodeWorker), // - predefined constants 'true' (= "true") and 'false' (= ""), // - function call, // - variable expression, perhaps followed by a method call. literal_expression<bNumeric> ::= CONSTANT_STRING | '(' #continue expression<bNumeric> ')' | '$' #continue #check(!bNumeric) expression<true> '$' | '~' #continue #check(bNumeric) literal_expression<true> | CONSTANT_CHAR | '!' #continue literal_expression<bNumeric> | #readNumeric // scans a number | // '#readIdentifier' is a predefined non terminal that scans // a C-like identifier. // - A:{"s1", ..., "sN"} means that the token must be worth // a constant string of the set, // - A:var means that the token value is assigned to the // variable 'var'. If the variable doesn't exist yet, it is // declared in the local scope, // - A:{"s1", ..., "sN"}:var means that the token must belong // to the set and that the value is assigned to the variable. #readIdentifier:{"true", "false"} | function_call | variable_expression ['.' #continue method_call]? ; // Non terminal of a variable. variable_expression ::= #readIdentifier:sIdentifier variable_expression<sIdentifier> | '#' #continue "evaluateVariable" '(' expression ')' ; // Non terminal of a script file to execute (parse/generation/interpretation). // Usually, it represents the file name of the script, but it may be the // full description of the script to execute, between brackets. script_file_expression<"free"> ::= '{' #continue [instruction]* '}' | expression ; script_file_expression<"pattern"> ::= '{' #continue => local bContinue = true; [ #check(bContinue) STARTING_RAW_TEXT #continue #ignore [ !preprocessor expression [STARTING_TAG | '}' => set bContinue = false;] | [instruction]* [STARTING_TAG | '}' => set bContinue = false;] ] ]+ | expression ; script_file_expression<"translate"> ::= script_file_expression<"BNF">; script_file_expression<"BNF"> ::= '{' #continue [BNF_instruction]* '}' | expression ; // The right-side of a variable. '#!ignore' in the non-terminal declaration // part of the production rule means that neither blanks or C++-like comments // must be scanned before calling the non-terminal. Because of an ambiguity // on '#' and '[' with the extended-BNF syntax, whitespaces aren't allowed // before them in a variable expression, while a BNF directive (#...) or a // repeatable sequence ([...]...) must have at least a blank or comment before // them. // // - points to a subnode with '.' as in C/C++/Java... for accessing // the attributes of a structure, // - points to an item of the current node's array, // - accesses to the first/last item of the array or to the parent's node, // - accesses to the nth item of the array (starting at 0), variable_expression<sIdentifier> : #!ignore ::= [ #ignore '.' #readIdentifier ![['<' concatenation_expression '>']? '('] | '[' #ignore #continue expression ']' | '#' [ VARIABLE_SPECIAL_ACCESSOR | '[' #ignore #continue expression ']' ] ]* ; // A method call consists of calling a function where (generally) the first // parameter is provided as an expression on the left-side: // sText.findString('/'); // calls the function // findString(sText, '/'); // For some predefined functions, the expression doesn't represent // the first parameter: // list.findElement("BNF"); // calls the function // findElement("BNF", list); // where 'list' occupied the second position. method_call ::= #readIdentifier:sMethodName [ predefined_method_call<sMethodName> | user_method_call ]; user_method_call ::= ['<' concatenation_expression '>']? '(' #continue [expression [',' #continue expression]*]? ')'; // Call of a predefined/user-defined function. function_call ::= #readIdentifier:sFunctionName [ predefined_function_call<sFunctionName> | module_function_call | user_function_call ]; module_function_call ::= "::" #readIdentifier:sFunctionName '(' #continue [expression [',' #continue expression]*]? ')'; user_function_call ::= ['<' concatenation_expression '>']? '(' #continue [expression [',' #continue expression]*]? ')'; //------------------------------------------------------------- // The instructions //------------------------------------------------------------- // The non-terminal of an instruction: // - a block of instructions, // - a simple statement, // - a call to a predefined function, // - a call to a predefined procedure, // - a call to a user function, // - a preprocessor directive, // - a server page's raw text (between @ or %> <%). instruction ::= '{' #continue [instruction]* '}' | #readIdentifier:sKeyword [ instruction<sKeyword> | predefined_function_call<sKeyword> ';' | predefined_procedure_call<sKeyword> ';' | module_function_call ';' | user_function_call ';' ] | preprocessor | variable_expression '.' #continue method_call ';' | #check(this != "procedural") STARTING_TAG #!ignore #continue STARTING_ENDING_RAW_TEXT #ignore [!preprocessor expression ![!'@' !"%>" !#empty]]? ; // Looks for a preprocessing directive preprocessor ::= '#' #readIdentifier:sKeyword preprocessor<sKeyword>; // The non-terminal 'preprocessor<"include">' includes a script file. preprocessor<"include"> ::= #continue CONSTANT_STRING; // The non-terminal 'preprocessor<"coverage">' records the coverage of // an output file by a template-based script. preprocessor<"coverage"> ::= #continue '(' variable_expression ')'; // The non-terminal 'preprocessor<"matching">' records the coverage of // an input file by an extended-BNF script. preprocessor<"matching"> ::= #continue '(' variable_expression ')'; // The non-terminal 'preprocessor<"jointpoint">' is called into an advice // (Aspect-Oriented Programming with template-based scripts). preprocessor<"jointpoint"> ::= ['(' #continue variable_expression ')']?; // The 'use' directive loads a dynamic library, whose name is the name // of the module ending with "cw". // Then, it adds new commands in CodeWorker. // Example: #use "PGSQL" -> load of "PGSQLcw.dll" preprocessor<"use"> ::= #continue #readIdentifier; // The generic form 'instruction<sIdentifier>' is called when the keyword // wasn't recognized as a statement. It might be an assignment. instruction<sIdentifier> ::= variable_expression<sIdentifier> ['=' | "+="] #continue expression ';'; //------------------ Some classical statements ------------------ instruction<"if"> ::= #continue boolean_expression instruction [ELSE #continue instruction]?; instruction<"do"> ::= #continue instruction WHILE boolean_expression ';'; instruction<"while"> ::= #continue boolean_expression instruction; // The 'switch' statement works on strings. The 'start' label takes the // flow of control if the controlled sequence starts with the corresponding // constant expression. instruction<"switch"> ::= #continue '(' expression ')' switch_body; switch_body ::= '{' #continue [ [ DEFAULT | [CASE | START] #continue CONSTANT_STRING ] ':' [instruction]* ]* '}'; //------------------ Some assignment operators ------------------ // The right member of an assignment operator may describe a constant tree // to assign to the variable (the left member). assignment_expression ::= '{' #continue [ assignment_expression [',' #continue assignment_expression]* ]? '}' | expression ; // Declare a local variable on the stack as a tree. The scope manages its // timelife. A value may be assigned to the variable. instruction<"local"> ::= #continue variable_expression ['=' #continue assignment_expression]? ';'; // Declare a global variable visible everywhere. A value may be assigned // to the variable. instruction<"global"> ::= #continue variable_expression ['=' #continue assignment_expression]? ';'; // Declare a local variable and assign a reference to another node. // localref A = B; // is the equivalent of: // local A; // ref A = B; instruction<"localref"> ::= #continue variable_expression '=' variable_expression ';'; // Copy a node to another integrally, after cleaning the destination node. instruction<"setall"> ::= #continue variable_expression '=' variable_expression ';'; // Merge a node to another integrally. instruction<"merge"> ::= #continue variable_expression '=' variable_expression ';'; // Classical assignment of a value to a node. If the node doesn't exist // yet, a warning is displayed but the node is created and the value // assigned. It is better to use 'insert' to create a node. instruction<"set"> ::= #continue variable_expression ["+=" | '='] assignment_expression ';'; // Assignment of a value to a node. If the node doesn't exist yet, it is created. // If nothing has to be assigned, the node is just created. instruction<"insert"> ::= #continue variable_expression [["+=" | '='] #continue assignment_expression]? ';'; // Assigns a reference to another node. instruction<"ref"> ::= #continue variable_expression '=' variable_expression ';'; // Adds a new item in an array, whose key is worth the position of the item // in the array (the last) starting at 0. instruction<"pushItem"> ::= #continue variable_expression ['=' #continue expression]? ';'; // The statement 'foreach' iterates items of an array. // It may sort items before, taking the case into account or not. // It may propagate the iteration on branches, which have the same // name as the array. Example: 'foreach i in cascading a.b.c ...' // will propagate the 'foreach' on 'i.c' and so on recursively. instruction<"foreach"> ::= #continue #readIdentifier IN [ [REVERSE]? SORTED [NO_CASE]? | CASCADING [#readIdentifier:{"first", "last"}]? ]* variable_expression instruction ; // The 'continue' statement, same meaning as in C/C++/Java. instruction<"continue"> ::= #continue ';'; // The 'break' statement, same meaning as in C/C++/Java. instruction<"break"> ::= #continue ';'; // The statement 'forfile' browses a directory and iterates all // files matching a pattern. The seach is recursive on directories // if 'cascading' is chosen. instruction<"forfile"> ::= #continue #readIdentifier IN [ [REVERSE]? SORTED [NO_CASE]? | CASCADING [#readIdentifier:{"first", "last"}]? ]* expression instruction ; // The statement 'select' crosscuts all tree nodes that match a // pattern of branch, in the spirit of XPath (XSL). instruction<"select"> ::= #continue #readIdentifier IN [SORTED]? motif_expression instruction ; // the non-terminal 'motif_expression' defines a kind of XPath expression // to apply on a subtree. motif_expression ::= [ '(' #continue motif_expression ')' | motif_and_expression ] [ ["||" | '|'] #continue motif_and_expression ]*; motif_and_expression ::= motif_concat_expression [["&&" | '&'] #continue motif_concat_expression]*; motif_concat_expression ::= motif_path_expression ['+' #continue motif_path_expression]*; motif_path_expression ::= motif_step_expression [ "..." #continue motif_ellipsis_expression | '.' #continue motif_step_expression ]*; motif_ellipsis_expression ::= motif_step_expression; motif_step_expression ::= #continue ['*' | #readIdentifier] ['[' [expression]? ']']* ; //---------- Declaration / definition of user-defined functions ---------- // The definition of a user-defined function starts with the keyword // 'function'. A function may have a kind of template form, instantiated // with a key between '<' and '>'. instruction<"function"> ::= #continue #readIdentifier ['<' #continue CONSTANT_STRING '>']? '(' [function_parameter [',' #continue function_parameter]*]? ')' function_body; classical_function_definition ::= #readIdentifier '(' #continue function_parameters ')' function_body; instantiated_template_function_definition ::= #readIdentifier '<' #continue CONSTANT_STRING '>' #continue '(' function_parameters ')' function_body; generic_template_function_definition ::= #readIdentifier '<' #continue #readIdentifier '>' #continue '(' function_parameters ')' [template_function_body | function_body]; function_parameters ::= [function_parameter [',' #continue function_parameter]*]?; function_parameter ::= #readIdentifier [':' #continue function_parameter_type]?; function_parameter_type ::= #readIdentifier:{"value", "variable", "node", "reference", "index"}; function_body ::= #continue '{' [instruction]* '}'; template_function_body ::= "{{" #continue [ STARTING_RAW_TEXT #continue #ignore [ !preprocessor expression [STARTING_TAG | '}' #break] | [instruction]* [STARTING_TAG | '}' #break] ] ]+ '}' ; // Forward declaration of a function. instruction<"declare"> ::= #continue FUNCTION #readIdentifier ['<' #continue CONSTANT_STRING '>']? '(' [function_parameter [',' #continue function_parameter]*]? ')' ';'; // External function: binding with a C++ implementation of the function, // defined by the user. instruction<"external"> ::= #continue FUNCTION #readIdentifier ['<' #continue CONSTANT_STRING '>']? '(' [function_parameter [',' #continue function_parameter]*]? ')' ';'; // The hook 'readonlyHook' is called when the tool tries to save a generated // file but that the replaced file is locked for writing. The name of the file // is passed by value. It must return a non-empty value if the file have been // unlocked in the body (suceeded call to the source code control system). instruction<"readonlyHook"> ::= #continue '(' #readIdentifier ')' function_body; instruction<"writefileHook"> ::= #continue '(' #readIdentifier ',' #readIdentifier ',' #readIdentifier ')' function_body; // The hook 'stepintoHook' is called when entering into a BNF non-terminal call, // just before running the production rule. // It passes first the signature of the BNF clause, and then the local scope. instruction<"stepintoHook"> ::= #continue '(' #readIdentifier ',' #readIdentifier ')' function_body; // The hook 'stepoutHook' is called when finishing a BNF non-terminal call, // just after running the production rule. // It passes the signature of the BNF clause, then the local scope and then // a boolean, is worth "true" if success. instruction<"stepoutHook"> ::= #continue '(' #readIdentifier ',' #readIdentifier ',' #readIdentifier ')' function_body; // Returns the value of a user-defined function. It is never a node. // If no expression returned, one consider returning the value of a hidden // local variable having the name of the function. instruction<"return"> ::= #continue [expression]? ';'; // Classical 'try/catch' statement. The tool puts the error message into a // variable. instruction<"try"> ::= #continue instruction "catch" '(' variable_expression ')' instruction; // The statement 'finally' defines a block to execute each time the flow // of control leaves the scope of the function, even in case of exception // raising. instruction<"finally"> ::= #continue instruction; // Deprecated way to call a function, ignoring the output result. instruction<"nop"> ::= #continue '(' function_call ')' ';'; // A 'jointpoint' statement to declare into a template-based script (Aspect-oriented construct) instruction<"jointpoint"> ::= #continue [#readIdentifier:"iterate"]? #readIdentifier ['(' #continue variable_expression ')']? [';' | instruction] ; // A 'advice' statement to declare into a template-based script (Aspect-oriented construct) instruction<"advice"> ::= #continue ADVICE_TYPE ['(' #continue #readIdentifier ')']? ':' expression instruction ; //---------- Statement modifiers ---------- // Choose a file as the standard input (function 'inputLine()'). instruction<"file_as_standard_input"> ::= #continue '(' expression ')' instruction; // Choose a string as the standard input (function 'inputLine()'). instruction<"string_as_standard_input"> ::= #continue '(' expression ')' instruction; // Redirects all console outputs to a variable while running an instruction. instruction<"quiet"> ::= #continue '(' variable_expression ')' instruction; // Measures the time consumed by an instruction. Use 'getLastDelay()' to // take the value in milliseconds after running the instruction. instruction<"delay"> ::= #continue instruction; // Runs an instruction under the integrated debug mode, running to the // console. instruction<"debug"> ::= #continue instruction; // Runs an instruction under the integrated quantify mode, measuring time // consuming in functions and the number of times each line of script is // visited. instruction<"quantify"> ::= #continue ['(' #continue expression ')']? instruction; // The 'project' tree is the main tree of the application, a global tree. // To change it locally, just for running an instruction, use 'new_project'. instruction<"new_project"> ::= #continue instruction; // In a translation or BNF script, change of the current parsed file. instruction<"parsed_file"> ::= #continue '(' expression ')' instruction; // In a translation or BNF script, change of the current input to a parsed string. instruction<"parsed_string"> ::= #continue '(' expression ')' instruction; // In a translation or template-based script, change of the current // generated file. instruction<"generated_file"> ::= #continue '(' expression ')' instruction; // In a translation or template-based script, change of the current // output to an appending mode in a given file. instruction<"appended_file"> ::= #continue '(' expression ')' instruction; // In a translation or template-based script, change of the current // output to a string instead of ta file. instruction<"generated_string"> ::= #continue '(' variable_expression ')' instruction; //--------------------------------------------------------------------- // Some lexical tokens //--------------------------------------------------------------------- PLUS ::= '+' #!ignore !'='; CONCAT ::= '+' #!ignore !'='; DEFAULT ::= #readIdentifier:"default"; CASE ::= #readIdentifier:"case"; START ::= #readIdentifier:"start"; CASCADING ::= #readIdentifier:"cascading"; ELSE ::= #readIdentifier:"else"; IN ::= #readIdentifier:"in"; INSET ::= #readIdentifier:"in"; NO_CASE ::= #readIdentifier:"no_case"; REVERSE ::= #readIdentifier:"reverse"; SORTED ::= #readIdentifier:"sorted"; WHILE ::= #readIdentifier:"while"; CONSTANT_STRING ::= #readCString; CONSTANT_CHAR