/*description: { Here is the complete grammar of the CodeWorker's scripting language. It is expressed in the extended-BNF dialect of CodeWorker, so has the advantage of running under \CodeWorker\ for scanning scripts, and has the original feature to be auto-descriptive. } */ /* "CodeWorker": a scripting language for parsing and generating text. Copyright (C) 1996-1997, 1999-2003 Cédric Lemaire This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA To contact the author: codeworker@free.fr */ // Defines how to ignore insignificant characters between tokens. #ignore ::= [' ' | '\t' | '\r' | '\n' | "/*" ignoreEmbeddedComment | "//"->'\n']*; // The first production rule is the head of the grammar. // The context variable pointed to by 'this' holds information about the // type of the script to scan: // - "procedural": the backbone of the scripting language, // - "extended-BNF": includes also translation scripts, // - "template-based": the server-page notation for generation patterns, // If the type is unknown, an empty string must be passed to the context // variable. translation_unit ::= script<this>; // The type of the script is unknown: the production rule tries the 3 // types. // This rule is the less declarative. It specifies the current script's type // for scanning and catches syntax errors for looking at alternatives. // Note the possible use of a C++-template syntax for non-terminal symbols. script<""> ::= #try // '=>' is an escape mode to execute a procedural instruction // or block. => set this = "procedural"; script<"procedural"> // We don't want to stop on error if the script wasn't recognized // as a common one. #catch(sError1) | #try => set this = "extended-BNF"; script<"extended-BNF"> #catch(sError2) | #try => set this = "template-based"; script<"template-based"> #catch(sError3) | // If none of the 3 types, the error for each of them is raised. => set this = ""; => error("Not recognized as a 'CodeWorker' script:\n" + " - procedural script:\n" + sError1 + " - extended-BNF script:\n" + sError2 + " - template-based script:\n" + sError3); ; // The non terminal 'script<"procedural">' scans a common script, such as a // leader script: not BNF and not template-based. // An error is raised if a syntax error is encountered (use of '#continue'). script<"procedural"> ::= #ignore // ignore blanks and C++ comments between symbols #continue // the rest of the sequence must be valid (or the scanner raises a syntax error) [instruction]+ #empty // end of file expected; because of '#continue', a syntax error is raised if not reached ; // The well-named non terminal script<"extended-BNF"> scans an extended-BNF script. // It expects a set of: // - BNF directives (case sensitive or not, ...), // - functions declaration/definition, // - production rules. // An error is raised if a syntax error is encountered (use of '#continue'). script<"extended-BNF"> ::= #ignore #continue [ BNF_general_directive | // '#readIdentifier' is a predefined non terminal that scans // a C-like identifier. // - A:{"s1", ..., "sN"} means that the token must be worth // a constant string of the set, // - A:var means that the token value is assigned to the // variable 'var'. If the variable doesn't exist yet, it is // declared in the local scope, // - A:{"s1", ..., "sN"}:var means that the token must belong // to the set and that the value is assigned to the variable. #readIdentifier:{"function", "declare", "external"}:sKeyword #continue instruction<sKeyword> | production_rule ]+ #empty; // The non terminal script<"template-based"> scans a template-based script. // Procedural instructions or expressions are embedded between @ symbols // or between <% and %>. // An error is raised if a syntax error is encountered (use of '#continue'). script<"template-based"> ::= #continue [ ->['@' | "<%"] #continue #ignore [ expression ['@' | "%>" | #empty] | [instruction]* ['@' | "%>" | #empty] ] ]+; // Called by '#ignore'. ignoreEmbeddedComment ::= ->["*/" | "/*" ignoreEmbeddedComment | "//"->'\n' ignoreEmbeddedComment]; //------------------------------------------------------------- // The expressions //------------------------------------------------------------- // A default expression (non arithmetic) that manipulates and returns // a string. expression ::= boolean_expression<false>; // A condition for 'while', 'if', ... statements. boolean_expression ::= ternary_expression<bNumeric> [boolean_operator #continue ternary_expression<bNumeric>]*; // A concatenation of string expressions: the '+' is interpreted as // concatenation. concatenation_expression ::= comparison_member_expression<false>; // The template non terminal 'expression<bNumeric>' handles a string // expression when the template variable 'bNumeric' is worth false // ('false' is a keyword of the language that means an empty string), // and an arithmetic expression when it is instantiated with true // (keyword that means "true"). // See the escape mode '$' in 'literal_expression<bNumeric>' to // understand how to swap in arithmetic mode. expression<bNumeric> ::= boolean_expression<bNumeric>; // Generic form of a boolean expression, both for arithmetic and string // expressions. boolean_expression<bNumeric> ::= ternary_expression<bNumeric> [boolean_operator #continue ternary_expression<bNumeric>]*; boolean_operator ::= "&&" | "||" | "^^" | '&' | '|' | '^'; // Non terminal of the C-like ternary operator '?' ':'. ternary_expression<bNumeric> ::= comparison_expression<bNumeric> ['?' #continue expression<bNumeric> ':' expression<bNumeric>]?; // Generic form of a comparison (both arithmetic and string). comparison_expression<bNumeric> ::= comparison_member_expression<bNumeric> [comparison_operator #continue comparison_member_expression<bNumeric>]*; comparison_operator ::= "<=" | "<>" | ">=" | "!=" | "==" | '<' | '=' | '>'; // The generic non-terminal 'comparison_member_expression' is quite // particular. A comparison's member returns either a numeric (stored as // a string) or a string. Here, 'comparison_member_expression<true>' // handles an arithmetic expression. It means that it recognizes // arithmetic operators ('+' means addition instead of concatenation). comparison_member_expression<true> ::= shift_expression [sum_operator #continue shift_expression]*; sum_operator ::= '+' | '-'; // Binary shift operators, as in C/C++/Java ... shift_expression ::= factor_expression [shift_operator #continue factor_expression]*; shift_operator ::= "<<" | ">>"; // The multiplication. factor_expression ::= literal_expression<true> [factor_operator #continue literal_expression<true>]*; factor_operator ::= '*' | '/' | '%'; // The non-terminal 'comparison_member_expression<false>' recognizes // string expressions only and '+' means concatenation. Arithmetic // operators like '-' and '%' aren't available. comparison_member_expression<false> ::= literal_expression<false> ['+' #continue literal_expression<false>]*; // The generic non terminal of a literal: // - string between double quotes, // - expression between parenthesis, // - arithmetic expression between '$' (cannot be reentrant), // - bitwise negation, // - constant char (interpreted as a string in CodeWorker), // - boolean negation, // - number (interpreted as a string in CodeWorker), // - predefined constants 'true' (= "true") and 'false' (= ""), // - function call, // - variable expression, perhaps followed by a method call. literal_expression<bNumeric> ::= CONSTANT_STRING | '(' #continue expression<bNumeric> ')' | '$' #continue #check(!bNumeric) expression<true> '$' | '~' #continue #check(bNumeric) literal_expression<true> | CONSTANT_CHAR | '!' #continue literal_expression<bNumeric> | #readNumeric // scans a number | #readIdentifier:{"true", "false"} | function_call | variable_expression ['.' #continue method_call]? ; // Non terminal of a variable. variable_expression ::= #readIdentifier:sIdentifier variable_expression<sIdentifier> | '#' #continue "evaluateVariable" '(' expression ')' ; // The right-side of a variable. '#!ignore' in the non-terminal declaration // part of the production rule means that neither blanks or C++-like comments // must be scanned before calling the non-terminal. Because of an ambiguity // on '#' and '[' with the extended-BNF syntax, whitespaces aren't allowed // before them in a variable expression, while a BNF directive (#...) or a // repeatable sequence ([...]...) must have at least a blank or comment before // them. // // - points to a subnode with '.' as in C/C++/Java... for accessing // the attributes of a structure, // - points to an item of the current node's array, // - accesses to the first/last item of the array or to the parent's node, // - accesses to the nth item of the array (starting at 0), variable_expression<sIdentifier> : #!ignore ::= [ #ignore '.' #readIdentifier ![['<' concatenation_expression '>']? '('] | '[' #ignore #continue expression ']' | '#' [ #readIdentifier:{"front", "back", "parent"} | '[' #ignore #continue expression ']' ] ]* ; // A method call consists of calling a function where (generally) the first // parameter is provided as an expression on the left-side: // sText.findString('/'); // calls the function // findString(sText, '/'); // For some predefined functions, the expression doesn't represent // the first parameter: // list.findElement("BNF"); // calls the function // findElement("BNF", list); // where 'list' occupied the second position. method_call ::= #readIdentifier:sMethodName [ predefined_method_call<sMethodName> | user_method_call<sMethodName> ]; user_method_call ::= ['<' concatenation_expression '>']? '(' #continue [expression [',' #continue expression]*]? ')'; // Call of a predefined/user-defined function. function_call ::= #readIdentifier:sFunctionName [ predefined_function_call<sFunctionName> | user_function_call ]; user_function_call ::= ['<' concatenation_expression '>']? '(' #continue [expression [',' #continue expression]*]? ')'; //------------------------------------------------------------- // The instructions //------------------------------------------------------------- // The non-terminal of an instruction: // - a block of instructions, // - a simple statement, // - a call to a predefined function, // - a call to a predefined procedure, // - a call to a user function, // - a preprocessor directive, // - a server page's raw text (between @ or %> <%). instruction ::= '{' #continue [instruction]* '}' | #readIdentifier:sKeyword [ instruction<sKeyword> | predefined_function_call<sKeyword> ';' | predefined_procedure_call<sKeyword> ';' | user_function_call ';' ] | '#' #readIdentifier:sKeyword preprocessor<sKeyword> | #check(this != "procedural") ['@' | "%>"] #!ignore #continue ->['@' | "<%" | #empty] #ignore [expression ![!'@' !"%>" !#empty]]? ; // The non-terminal 'preprocessor<"include">' includes a script file. preprocessor<"include"> ::= #continue CONSTANT_STRING; // The generic form 'instruction<sIdentifier>' is called when the keyword // wasn't recognized as a statement. It might be an assignment. instruction<sIdentifier> ::= variable_expression<sIdentifier> ['=' | "+="] #continue expression ';'; //------------------ Some classical statements ------------------ instruction<"if"> ::= #continue boolean_expression instruction [ELSE #continue instruction]?; instruction<"do"> ::= #continue instruction WHILE boolean_expression ';'; instruction<"while"> ::= #continue boolean_expression instruction; // The 'switch' statement works on strings. The 'start' label takes the // flow of control if the controlled sequence starts with the corresponding // constant expression. instruction<"switch"> ::= #continue '(' expression ')' switch_body; switch_body ::= '{' #continue [ [ DEFAULT | [CASE | START] #continue CONSTANT_STRING ] ':' [instruction]* ]* '}'; //------------------ Some assignment operators ------------------ // Declare a local variable on the stack as a tree. The scope manages its // timelife. A value may be assigned to the variable. instruction<"local"> ::= #continue variable_expression ['=' #continue expression]? ';'; // Declare a global variable visible everywhere. A value may be assigned // to the variable. instruction<"global"> ::= #continue variable_expression ['=' #continue expression]? ';'; // Declare a local variable and assign a reference to another node. // localref A = B; // is the equivalent of: // local A; // ref A = B; instruction<"localref"> ::= #continue variable_expression '=' variable_expression ';'; // Copy a node to another integrally, after cleaning the destination node. instruction<"setall"> ::= #continue variable_expression '=' variable_expression ';'; // Merge a node to another integrally. instruction<"merge"> ::= #continue variable_expression '=' variable_expression ';'; // Classical assignment of a value to a node. If the node doesn't exist // yet, a warning is displayed but the node is created and the value // assigned. It is better to use 'insert' to create a node. instruction<"set"> ::= #continue variable_expression ["+=" | '='] expression ';'; // Assignment of a value to a node. If the node doesn't exist yet, it is created. // If nothing has to be assigned, the node is just created. instruction<"insert"> ::= #continue variable_expression [["+=" | '='] #continue expression]? ';'; // Assigns a reference to another node. instruction<"ref"> ::= #continue variable_expression '=' variable_expression ';'; // Adds a new item in an array, whose key is worth the position of the item // in the array (the last) starting at 0. instruction<"pushItem"> ::= #continue variable_expression ['=' #continue expression]? ';'; // The statement 'foreach' iterates items of an array. // It may sort items before, taking the case into account or not. // It may propagate the iteration on branches, which have the same // name as the array. Example: 'foreach i in cascading a.b.c ...' // will propagate the 'foreach' on 'i.c' and so on recursively. instruction<"foreach"> ::= #continue #readIdentifier IN [ SORTED [NO_CASE]? | CASCADING [#readIdentifier:{"first", "last"}]? ]* variable_expression instruction ; // The 'continue' statement, same meaning as in C/C++/Java. instruction<"continue"> ::= #continue ';'; // The 'break' statement, same meaning as in C/C++/Java. instruction<"break"> ::= #continue ';'; // The statement 'forfile' browses a directory and iterates all // files matching a pattern. The seach is recursive on directories // if 'cascading' is chosen. instruction<"forfile"> ::= #continue #readIdentifier IN [ SORTED [NO_CASE]? | CASCADING [#readIdentifier:{"first", "last"}]? ]* expression instruction ; // The statement 'select' crosscuts all tree nodes that match a // pattern of branch, in the spirit of XPath (XSL). instruction<"select"> ::= #continue #readIdentifier IN [SORTED]? motif_expression instruction ; // the non-terminal 'motif_expression' defines a kind of XPath expression // to apply on a subtree. motif_expression ::= [ '(' #continue motif_expression ')' | motif_and_expression ] [ ["||" | '|'] #continue motif_and_expression ]*; motif_and_expression ::= motif_concat_expression [["&&" | '&'] #continue motif_concat_expression]*; motif_concat_expression ::= motif_path_expression ['+' #continue motif_path_expression]*; motif_path_expression ::= motif_step_expression [ "..." #continue motif_ellipsis_expression | '.' #continue motif_step_expression ]*; motif_ellipsis_expression ::= motif_step_expression; motif_step_expression ::= #continue ['*' | #readIdentifier] ['[' [expression]? ']']* ; //---------- Declaration / definition of user-defined functions ---------- // The definition of a user-defined function starts with the keyword // 'function'. A function may have a kind of template form, instantiated // with a key between '<' and '>'. instruction<"function"> ::= #continue #readIdentifier ['<' #continue CONSTANT_STRING '>']? '(' [function_parameter [',' #continue function_parameter]*]? ')' function_body; function_parameter ::= #readIdentifier [':' #continue #readIdentifier:{"value", "variable", "node", "reference", "index"}]?; function_body ::= #continue '{' [instruction]* '}'; // Forward declaration of a function. instruction<"declare"> ::= #continue #readIdentifier:"function" #readIdentifier ['<' #continue CONSTANT_STRING '>']? '(' [function_parameter [',' #continue function_parameter]*]? ')' ';'; // External function: binding with a C++ implementation of the function, // defined by the user. instruction<"external"> ::= #continue #readIdentifier:"function" #readIdentifier ['<' #continue CONSTANT_STRING '>']? '(' [function_parameter [',' #continue function_parameter]*]? ')' ';'; // The hook 'readonlyHook' is called when the tool tries to save a generated // file but that the replaced file is locked for writing. The name of the file // is passed by value. It must return a non-empty value if the file have been // unlocked in the body (suceeded call to the source code control system). instruction<"readonlyHook"> ::= #continue '(' #readIdentifier ')' function_body; instruction<"writefileHook"> ::= #continue '(' #readIdentifier ',' #readIdentifier ',' #readIdentifier ')' function_body; // Returns the value of a user-defined function. It is never a node. instruction<"return"> ::= #continue expression ';'; // Classical 'try/catch' statement. The tool puts the error message into a // variable. instruction<"try"> ::= #continue instruction "catch" '(' variable_expression ')' instruction; // The statement 'finally' defines a block to execute each time the flow // of control leaves the scope of the function, even in case of exception // raising. instruction<"finally"> ::= #continue instruction; // Deprecated way to call a function, ignoring the output result. instruction<"nop"> ::= #continue '(' function_call ')' ';'; //---------- Statement modifiers ---------- // Choose a file as the standard input (function 'inputLine()'). instruction<"file_as_standard_input"> ::= #continue '(' expression ')' instruction; // Choose a string as the standard input (function 'inputLine()'). instruction<"string_as_standard_input"> ::= #continue '(' expression ')' instruction; // Redirects all console outputs to a variable while running an instruction. instruction<"quiet"> ::= #continue '(' variable_expression ')' instruction; // Measures the time consumed by an instruction. Use 'getLastDelay()' to // take the value in milliseconds after running the instruction. instruction<"delay"> ::= #continue instruction; // Runs an instruction under the integrated debug mode, running to the // console. instruction<"debug"> ::= #continue instruction; // Runs an instruction under the integrated quantify mode, measuring time // consuming in functions and the number of times each line of script is // visited. instruction<"quantify"> ::= #continue ['(' #continue expression ')']? instruction; // The 'project' tree is the main tree of the application, a global tree. // To change it locally, just for running an instruction, use 'new_project'. instruction<"new_project"> ::= #continue instruction; // In a translation or BNF script, change of the current parsed file. instruction<"parsed_file"> ::= #continue '(' expression ')' instruction; // In a translation or template-based script, change of the current // generated file. instruction<"generated_file"> ::= #continue '(' expression ')' instruction; // In a translation or template-based script, change of the current // output to an appending mode in a given file. instruction<"appended_file"> ::= #continue '(' expression ')' instruction; // In a translation or template-based script, change of the current // output to a string instead of ta file. instruction<"generated_string"> ::= #continue '(' variable_expression ')' instruction; //--------------------------------------------------------------------- // Some lexical tokens //--------------------------------------------------------------------- DEFAULT ::= #readIdentifier:"default"; CASE ::= #readIdentifier:"case"; START ::= #readIdentifier:"start"; CASCADING ::= #readIdentifier:"cascading"; ELSE ::= #readIdentifier:"else"; IN ::= #readIdentifier:"in"; NO_CASE ::= #readIdentifier:"no_case"; SORTED ::= #readIdentifier:"sorted"; WHILE ::= #readIdentifier:"while"; CONSTANT_STRING ::= #readCString; CONSTANT_CHAR ::= '\'' #!ignore #continue ['\\']? #readChar '\''; PRULE_SYMBOL ::= "::="; NON_TERMINAL ::= #readIdentifier; ALTERNATION ::= '|'; TR_BEGIN ::= '<'; TR_END ::= '>'; //--------------------------------------------------------------------- // BNF script //--------------------------------------------------------------------- // A BNF directive starts with the symbol '#' and may be related to the // case or to the production rule for ignoring blanks between tokens... BNF_general_directive ::= '#' #readIdentifier:sKeyword BNF_general_directive<sKeyword> ; // If not recognized as a BNF directive, it is a common preprocessor // directive. BNF_general_directive<T> ::= preprocessor<T>; // If set, the case isn't taken into account. BNF_general_directive<"noCase"> ::= #check(true); // Defines the production rule for ignoring comments and blanks between // tokens. BNF_general_directive<"ignore"> ::= #continue PRULE_SYMBOL right_side_production_rule; // Overload a non-terminal whose production rule has already been defined. BNF_general_directive<"overload"> ::= '#' #continue #readIdentifier:"ignore" BNF_general_directive<"ignore"> | production_rule; // Only under the translation mode: means that the input stream is copied // to the output stream automatically while scanning (useful for program // transformations). BNF_general_directive<"implicitCopy"> ::= ['(' #continue #readIdentifier ['<' #continue CONSTANT_STRING '>']? ')']?; // Only under the translation mode, chosen by default: the script specifies // between '@' symbols (or between '%>' '<%') the text to write in the output // stream. BNF_general_directive<"explicitCopy"> ::= #check(true); // Definition of a production rule that may be template, resolved/instantiated // or not. // A non-terminal admits parameters (passed by value, by reference, by node) and // may return a value (see documentation). production_rule ::= NON_TERMINAL #continue [ TR_BEGIN #continue [ #readIdentifier | CONSTANT_STRING ] TR_END ]?