/*description:
{
    Here is the complete grammar of the CodeWorker's scripting language.

    It is expressed in the extended-BNF dialect of CodeWorker, so has the
    advantage of running under \CodeWorker\ for scanning scripts, and has the
    original feature to be auto-descriptive.
}
*/

/* "CodeWorker":    a scripting language for parsing and generating text.

Copyright (C) 1996-1997, 1999-2003 Cédric Lemaire

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

To contact the author: codeworker@free.fr
*/

// Defines how to ignore insignificant characters between tokens.
#ignore    ::= [' ' | '\t' | '\r' | '\n' | "/*" ignoreEmbeddedComment | "//"->'\n']*;

// The first production rule is the head of the grammar.
// The context variable pointed to by 'this' holds information about the
// type of the script to scan:
//   - "procedural": the backbone of the scripting language,
//   - "extended-BNF": includes also translation scripts,
//   - "template-based": the server-page notation for generation patterns,
// If the type is unknown, an empty string must be passed to the context
// variable.
translation_unit    ::= script<this>;

// The type of the script is unknown: the production rule tries the 3
// types.
// This rule is the less declarative. It specifies the current script's type
// for scanning and catches syntax errors for looking at alternatives.
// Note the possible use of a C++-template syntax for non-terminal symbols.
script<"">    ::=
        #try
            // '=>' is an escape mode to execute a procedural instruction
            // or block.
            => set this = "procedural";
            script<"procedural">
        // We don't want to stop on error if the script wasn't recognized
        // as a common one.
        #catch(sError1)
    |
        #try
            => set this = "extended-BNF";
            script<"extended-BNF">
        #catch(sError2)
    |
        #try
            => set this = "template-based";
            script<"template-based">
        #catch(sError3)
    |
        // If none of the 3 types, the error for each of them is raised.
        => set this = "";
        => error("Not recognized as a 'CodeWorker' script:\n" +
                 "  - procedural script:\n" + sError1 +
                 "  - extended-BNF script:\n" + sError2 +
                 "  - template-based script:\n" + sError3);
    ;

// The non terminal 'script<"procedural">' scans a common script, such as a
// leader script: not BNF and not template-based.
// An error is raised if a syntax error is encountered (use of '#continue').
script<"procedural">    ::=
        #ignore // ignore blanks and C++ comments between symbols
        #continue // the rest of the sequence must be valid (or the scanner raises a syntax error)
        [instruction]+
        #empty // end of file expected; because of '#continue', a syntax error is raised if not reached
        ;

// The well-named non terminal script<"extended-BNF"> scans an extended-BNF script.
// It expects a set of:
//   - BNF directives (case sensitive or not, ...),
//   - functions declaration/definition,
//   - production rules.
// An error is raised if a syntax error is encountered (use of '#continue').
script<"extended-BNF">    ::=
        #ignore
        #continue
        [
                BNF_general_directive
            |
                // '#readIdentifier' is a predefined non terminal that scans
                // a C-like identifier.
                // - A:{"s1", ..., "sN"} means that the token must be worth
                //   a constant string of the set,
                // - A:var means that the token value is assigned to the
                //   variable 'var'. If the variable doesn't exist yet, it is
                //   declared in the local scope,
                // - A:{"s1", ..., "sN"}:var means that the token must belong
                //   to the set and that the value is assigned to the variable.
                #readIdentifier:{"function", "declare", "external"}:sKeyword
                #continue
                instruction<sKeyword>
            |
                production_rule
        ]+
        #empty;

// The non terminal script<"template-based"> scans a template-based script.
// Procedural instructions or expressions are embedded between @ symbols
// or between <% and %>.
// An error is raised if a syntax error is encountered (use of '#continue').
script<"template-based">    ::=
        #continue
        [
            ->['@' | "<%"]
            #continue
            #ignore
            [
                    expression
                    ['@' | "%>" | #empty]
                |
                    [instruction]*
                    ['@' | "%>" | #empty]
            ]
        ]+;


// Called by '#ignore'.
ignoreEmbeddedComment    ::= ->["*/" | "/*" ignoreEmbeddedComment | "//"->'\n' ignoreEmbeddedComment];

//-------------------------------------------------------------
//                          The expressions
//-------------------------------------------------------------

// A default expression (non arithmetic) that manipulates and returns
// a string.
expression    ::=    boolean_expression<false>;

// A condition for 'while', 'if', ... statements.
boolean_expression    ::= ternary_expression<bNumeric> [boolean_operator #continue ternary_expression<bNumeric>]*;

// A concatenation of string expressions: the '+' is interpreted as
// concatenation.
concatenation_expression    ::= comparison_member_expression<false>;

// The template non terminal 'expression<bNumeric>' handles a string
// expression when the template variable 'bNumeric' is worth false
// ('false' is a keyword of the language that means an empty string),
// and an arithmetic expression when it is instantiated with true
// (keyword that means "true").
// See the escape mode '$' in 'literal_expression<bNumeric>' to
// understand how to swap in arithmetic mode.
expression<bNumeric>    ::=    boolean_expression<bNumeric>;

// Generic form of a boolean expression, both for arithmetic and string
// expressions.
boolean_expression<bNumeric>    ::= ternary_expression<bNumeric> [boolean_operator #continue ternary_expression<bNumeric>]*;
boolean_operator    ::= "&&" | "||" | "^^" | '&' | '|' | '^';

// Non terminal of the C-like ternary operator '?' ':'.
ternary_expression<bNumeric>    ::= comparison_expression<bNumeric> ['?' #continue expression<bNumeric> ':' expression<bNumeric>]?;

// Generic form of a comparison (both arithmetic and string).
comparison_expression<bNumeric>    ::= comparison_member_expression<bNumeric> [comparison_operator #continue comparison_member_expression<bNumeric>]*;
comparison_operator    ::=    "<=" | "<>" | ">=" | "!=" | "==" | '<' | '=' | '>';

// The generic non-terminal 'comparison_member_expression' is quite
// particular. A comparison's member returns either a numeric (stored as
// a string) or a string. Here, 'comparison_member_expression<true>'
// handles an arithmetic expression. It means that it recognizes
// arithmetic operators ('+' means addition instead of concatenation).
comparison_member_expression<true>    ::= shift_expression [sum_operator #continue shift_expression]*;
sum_operator    ::= '+' | '-';

// Binary shift operators, as in C/C++/Java ...
shift_expression    ::= factor_expression [shift_operator #continue factor_expression]*;
shift_operator    ::= "<<" | ">>";

// The multiplication.
factor_expression    ::= literal_expression<true> [factor_operator #continue literal_expression<true>]*;
factor_operator    ::= '*' | '/' | '%';


// The non-terminal 'comparison_member_expression<false>' recognizes
// string expressions only and '+' means concatenation. Arithmetic
// operators like '-' and '%' aren't available.
comparison_member_expression<false>    ::= literal_expression<false> ['+' #continue literal_expression<false>]*;


// The generic non terminal of a literal:
//   - string between double quotes,
//   - expression between parenthesis,
//   - arithmetic expression between '$' (cannot be reentrant),
//   - bitwise negation,
//   - constant char (interpreted as a string in CodeWorker),
//   - boolean negation,
//   - number (interpreted as a string in CodeWorker),
//   - predefined constants 'true' (= "true") and 'false' (= ""),
//   - function call,
//   - variable expression, perhaps followed by a method call.
literal_expression<bNumeric>    ::=
        CONSTANT_STRING
    |
        '(' #continue expression<bNumeric> ')'
    |
        '$' #continue #check(!bNumeric) expression<true> '$'
    |
        '~' #continue #check(bNumeric) literal_expression<true>
    |
        CONSTANT_CHAR
    |
        '!' #continue literal_expression<bNumeric>
    |
        #readNumeric // scans a number
    |
        #readIdentifier:{"true", "false"}
    |
        function_call
    |
        variable_expression ['.' #continue method_call]?
        ;

// Non terminal of a variable.
variable_expression    ::=
        #readIdentifier:sIdentifier variable_expression<sIdentifier>
    |
        '#' #continue "evaluateVariable" '(' expression ')'
        ;

// The right-side of a variable. '#!ignore' in the non-terminal declaration
// part of the production rule means that neither blanks or C++-like comments
// must be scanned before calling the non-terminal. Because of an ambiguity
// on '#' and '[' with the extended-BNF syntax, whitespaces aren't allowed
// before them in a variable expression, while a BNF directive (#...) or a
// repeatable sequence ([...]...) must have at least a blank or comment before
// them.
//
//   - points to a subnode with '.' as in C/C++/Java... for accessing
//     the attributes of a structure,
//   - points to an item of the current node's array,
//   - accesses to the first/last item of the array or to the parent's node,
//   - accesses to the nth item of the array (starting at 0),
variable_expression<sIdentifier> : #!ignore    ::=
        [
                #ignore '.' #readIdentifier
                ![['<' concatenation_expression '>']? '(']
            |
                '[' #ignore #continue expression ']'
            |
                '#'
                [
                        #readIdentifier:{"front", "back", "parent"}
                    |
                        '[' #ignore #continue expression ']'
                ]
        ]*
        ;

// A method call consists of calling a function where (generally) the first
// parameter is provided as an expression on the left-side:
//    sText.findString('/');
// calls the function
//    findString(sText, '/');
// For some predefined functions, the expression doesn't represent
// the first parameter:
//    list.findElement("BNF");
// calls the function
//    findElement("BNF", list);
// where 'list' occupied the second position.
method_call    ::=
        #readIdentifier:sMethodName
        [
                predefined_method_call<sMethodName>
            |
                user_method_call<sMethodName>
        ];
user_method_call    ::=    ['<' concatenation_expression '>']? '(' #continue [expression [',' #continue expression]*]? ')';

// Call of a predefined/user-defined function.
function_call    ::=
        #readIdentifier:sFunctionName
        [
                predefined_function_call<sFunctionName>
            |
                user_function_call
        ];
user_function_call    ::=    ['<' concatenation_expression '>']? '(' #continue [expression [',' #continue expression]*]? ')';


//-------------------------------------------------------------
//                          The instructions
//-------------------------------------------------------------

// The non-terminal of an instruction:
//   - a block of instructions,
//   - a simple statement,
//   - a call to a predefined function,
//   - a call to a predefined procedure,
//   - a call to a user function,
//   - a preprocessor directive,
//   - a server page's raw text (between @ or %> <%).
instruction    ::=
        '{' #continue [instruction]* '}'
    |
        #readIdentifier:sKeyword
        [
                instruction<sKeyword>
            |
                predefined_function_call<sKeyword> ';'
            |
                predefined_procedure_call<sKeyword> ';'
            |
                user_function_call ';'
        ]
    |
        '#'
        #readIdentifier:sKeyword
        preprocessor<sKeyword>
    |
        #check(this != "procedural")
        ['@' | "%>"]
        #!ignore
        #continue ->['@' | "<%" | #empty]
        #ignore
        [expression ![!'@' !"%>" !#empty]]?
        ;

// The non-terminal 'preprocessor<"include">' includes a script file.
preprocessor<"include">    ::= #continue CONSTANT_STRING;

// The generic form 'instruction<sIdentifier>' is called when the keyword
// wasn't recognized as a statement. It might be an assignment.
instruction<sIdentifier>    ::=    variable_expression<sIdentifier> ['=' | "+="] #continue expression ';';

//------------------ Some classical statements ------------------
instruction<"if">    ::= #continue boolean_expression instruction [ELSE #continue instruction]?;
instruction<"do">    ::= #continue instruction WHILE boolean_expression ';';
instruction<"while">    ::= #continue boolean_expression instruction;

// The 'switch' statement works on strings. The 'start' label takes the
// flow of control if the controlled sequence starts with the corresponding
// constant expression.
instruction<"switch">    ::= #continue '(' expression ')' switch_body;
switch_body    ::=
        '{'
        #continue
        [
            [
                DEFAULT
            |
                [CASE | START] #continue CONSTANT_STRING
            ]
            ':'
            [instruction]*
        ]*
        '}';


//------------------ Some assignment operators ------------------

// Declare a local variable on the stack as a tree. The scope manages its
// timelife. A value may be assigned to the variable.
instruction<"local">    ::= #continue variable_expression ['=' #continue expression]? ';';

// Declare a global variable visible everywhere. A value may be assigned
// to the variable.
instruction<"global">    ::= #continue variable_expression ['=' #continue expression]? ';';

// Declare a local variable and assign a reference to another node.
//     localref A = B;
//   is the equivalent of:
//     local A;
//     ref A = B;
instruction<"localref">    ::= #continue variable_expression '=' variable_expression ';';

// Copy a node to another integrally, after cleaning the destination node.
instruction<"setall">    ::= #continue variable_expression '=' variable_expression ';';

// Merge a node to another integrally.
instruction<"merge">    ::= #continue variable_expression '=' variable_expression ';';

// Classical assignment of a value to a node. If the node doesn't exist
// yet, a warning is displayed but the node is created and the value
// assigned. It is better to use 'insert' to create a node.
instruction<"set">    ::= #continue variable_expression ["+=" | '='] expression ';';

// Assignment of a value to a node. If the node doesn't exist yet, it is created.
// If nothing has to be assigned, the node is just created.
instruction<"insert">    ::= #continue variable_expression [["+=" | '='] #continue expression]? ';';

// Assigns a reference to another node.
instruction<"ref">    ::= #continue variable_expression '=' variable_expression ';';

// Adds a new item in an array, whose key is worth the position of the item
// in the array (the last) starting at 0.
instruction<"pushItem">    ::= #continue variable_expression ['=' #continue expression]? ';';

// The statement 'foreach' iterates items of an array.
// It may sort items before, taking the case into account or not.
// It may propagate the iteration on branches, which have the same
// name as the array. Example: 'foreach i in cascading a.b.c ...'
// will propagate the 'foreach' on 'i.c' and so on recursively.
instruction<"foreach">    ::= #continue #readIdentifier
        IN
        [
                SORTED
                [NO_CASE]?
            |
                CASCADING
                [#readIdentifier:{"first", "last"}]?
        ]*
        variable_expression
        instruction
        ;

// The 'continue' statement, same meaning as in C/C++/Java.
instruction<"continue">    ::= #continue ';';
// The 'break' statement, same meaning as in C/C++/Java.
instruction<"break">    ::= #continue ';';

// The statement 'forfile' browses a directory and iterates all
// files matching a pattern. The seach is recursive on directories
// if 'cascading' is chosen.
instruction<"forfile">    ::= #continue #readIdentifier
        IN
        [
                SORTED
                [NO_CASE]?
            |
                CASCADING
                [#readIdentifier:{"first", "last"}]?
        ]*
        expression
        instruction
        ;

// The statement 'select' crosscuts all tree nodes that match a
// pattern of branch, in the spirit of XPath (XSL).
instruction<"select">    ::= #continue #readIdentifier
        IN
        [SORTED]?
        motif_expression
        instruction
        ;

// the non-terminal 'motif_expression' defines a kind of XPath expression
// to apply on a subtree.
motif_expression    ::=
        [
                '(' #continue motif_expression ')'
            |
                motif_and_expression
        ]
        [
            ["||" | '|']
            #continue
            motif_and_expression
        ]*;
motif_and_expression    ::=    motif_concat_expression [["&&" | '&'] #continue motif_concat_expression]*;
motif_concat_expression    ::= motif_path_expression ['+' #continue motif_path_expression]*;
motif_path_expression    ::=
        motif_step_expression
        [
                "..." #continue motif_ellipsis_expression
            |
                '.' #continue motif_step_expression
        ]*;
motif_ellipsis_expression    ::= motif_step_expression;
motif_step_expression    ::=
        #continue
        ['*' | #readIdentifier]
        ['[' [expression]? ']']*
        ;


//---------- Declaration / definition of user-defined functions ----------

// The definition of a user-defined function starts with the keyword
// 'function'. A function may have a kind of template form, instantiated
// with a key between '<' and '>'.
instruction<"function">    ::= #continue #readIdentifier ['<' #continue CONSTANT_STRING '>']? '(' [function_parameter [',' #continue function_parameter]*]? ')' function_body;
function_parameter    ::=    #readIdentifier [':' #continue #readIdentifier:{"value", "variable", "node", "reference", "index"}]?;
function_body    ::=    #continue '{' [instruction]* '}';

// Forward declaration of a function.
instruction<"declare">    ::= #continue #readIdentifier:"function" #readIdentifier ['<' #continue CONSTANT_STRING '>']? '(' [function_parameter [',' #continue function_parameter]*]? ')' ';';

// External function: binding with a C++ implementation of the function,
// defined by the user.
instruction<"external">    ::= #continue #readIdentifier:"function" #readIdentifier ['<' #continue CONSTANT_STRING '>']? '(' [function_parameter [',' #continue function_parameter]*]? ')' ';';

// The hook 'readonlyHook' is called when the tool tries to save a generated
// file but that the replaced file is locked for writing. The name of the file
// is passed by value. It must return a non-empty value if the file have been
// unlocked in the body (suceeded call to the source code control system).
instruction<"readonlyHook">    ::= #continue '(' #readIdentifier ')' function_body;

instruction<"writefileHook">    ::= #continue '(' #readIdentifier ',' #readIdentifier ',' #readIdentifier ')' function_body;

// Returns the value of a user-defined function. It is never a node.
instruction<"return">    ::= #continue expression ';';

// Classical 'try/catch' statement. The tool puts the error message into a
// variable.
instruction<"try">    ::= #continue instruction "catch" '(' variable_expression ')' instruction;

// The statement 'finally' defines a block to execute each time the flow
// of control leaves the scope of the function, even in case of exception
// raising.
instruction<"finally">    ::= #continue instruction;

// Deprecated way to call a function, ignoring the output result.
instruction<"nop">    ::= #continue '(' function_call ')' ';';


//---------- Statement modifiers ----------

// Choose a file as the standard input (function 'inputLine()').
instruction<"file_as_standard_input">    ::= #continue '(' expression ')' instruction;

// Choose a string as the standard input (function 'inputLine()').
instruction<"string_as_standard_input">    ::= #continue '(' expression ')' instruction;

// Redirects all console outputs to a variable while running an instruction.
instruction<"quiet">    ::= #continue '(' variable_expression ')' instruction;

// Measures the time consumed by an instruction. Use 'getLastDelay()' to
// take the value in milliseconds after running the instruction.
instruction<"delay">    ::= #continue instruction;

// Runs an instruction under the integrated debug mode, running to the
// console.
instruction<"debug">    ::= #continue instruction;

// Runs an instruction under the integrated quantify mode, measuring time
// consuming in functions and the number of times each line of script is
// visited.
instruction<"quantify">    ::= #continue ['(' #continue expression ')']? instruction;

// The 'project' tree is the main tree of the application, a global tree.
// To change it locally, just for running an instruction, use 'new_project'.
instruction<"new_project">    ::= #continue instruction;

// In a translation or BNF script, change of the current parsed file.
instruction<"parsed_file">    ::= #continue '(' expression ')' instruction;

// In a translation or template-based script, change of the current
// generated file.
instruction<"generated_file">    ::= #continue '(' expression ')' instruction;

// In a translation or template-based script, change of the current
// output to an appending mode in a given file.
instruction<"appended_file">    ::= #continue '(' expression ')' instruction;

// In a translation or template-based script, change of the current
// output to a string instead of ta file.
instruction<"generated_string">    ::= #continue '(' variable_expression ')' instruction;


//---------------------------------------------------------------------
//                        Some lexical tokens
//---------------------------------------------------------------------

DEFAULT    ::= #readIdentifier:"default";
CASE    ::= #readIdentifier:"case";
START    ::= #readIdentifier:"start";
CASCADING    ::= #readIdentifier:"cascading";
ELSE    ::= #readIdentifier:"else";
IN        ::= #readIdentifier:"in";
NO_CASE    ::= #readIdentifier:"no_case";
SORTED    ::= #readIdentifier:"sorted";
WHILE    ::= #readIdentifier:"while";

CONSTANT_STRING    ::= #readCString;
CONSTANT_CHAR    ::= '\'' #!ignore #continue ['\\']? #readChar '\'';

PRULE_SYMBOL    ::= "::=";
NON_TERMINAL    ::=    #readIdentifier;
ALTERNATION        ::= '|';
TR_BEGIN        ::= '<';
TR_END            ::= '>';

//---------------------------------------------------------------------
//                                BNF script
//---------------------------------------------------------------------

// A BNF directive starts with the symbol '#' and may be related to the
// case or to the production rule for ignoring blanks between tokens...
BNF_general_directive    ::=
        '#'
        #readIdentifier:sKeyword
        BNF_general_directive<sKeyword>
        ;

// If not recognized as a BNF directive, it is a common preprocessor
// directive.
BNF_general_directive<T>    ::= preprocessor<T>;

// If set, the case isn't taken into account.
BNF_general_directive<"noCase">    ::= #check(true);

// Defines the production rule for ignoring comments and blanks between
// tokens.
BNF_general_directive<"ignore">    ::= #continue PRULE_SYMBOL right_side_production_rule;

// Overload a non-terminal whose production rule has already been defined.
BNF_general_directive<"overload">    ::=
        '#' #continue #readIdentifier:"ignore" BNF_general_directive<"ignore">
    |
        production_rule;

// Only under the translation mode: means that the input stream is copied
// to the output stream automatically while scanning (useful for program
// transformations).
BNF_general_directive<"implicitCopy">    ::= ['(' #continue #readIdentifier ['<' #continue CONSTANT_STRING '>']? ')']?;

// Only under the translation mode, chosen by default: the script specifies
// between '@' symbols (or between '%>' '<%') the text to write in the output
// stream.
BNF_general_directive<"explicitCopy">    ::= #check(true);

// Definition of a production rule that may be template, resolved/instantiated
// or not.
// A non-terminal admits parameters (passed by value, by reference, by node) and
// may return a value (see documentation).
production_rule    ::=
        NON_TERMINAL
        #continue
        [
            TR_BEGIN
            #continue
            [
                    #readIdentifier
                |
                    CONSTANT_STRING
            ]
            TR_END
        ]?