/** * This file defines the grammar for takentaal. * It is divided into parser rules (lowercase) and lexer rules (uppercase). * The parser splits an input into tokens accoring to the lexer rules. * At any point, all lexer rules are considered. If multiple rules match, * a lexer rule is chosen as follows: * - the rule that matches the longest input is chosen * - any implicit rule, e.g. 'a', is chosen * - the first defined rule is chosen. * Since this grammar has to match unquoted texts and text are usually longer * than other token matches, the TEXT rule disallows many characters as the * first character to start with. */ grammar takentaal; takentaal : header plan ; header : 'takentaal v0.1.0' EOL ; plan : PLAN_TOKEN S* amount TEXT EOL description task+ ; description : (TEXT EOL)* ; task : TASK_TOKEN S* amount TEXT EOL description subtask* ; subtask : SUBTASK_TOKEN S* amount TEXT EOL description ; amount : START_AMOUNT S* INT END_AMOUNT | ; PLAN_TOKEN : '#' ; TASK_TOKEN : '##' ; SUBTASK_TOKEN : (SUBTASK_NEW_TOKEN | SUBTASK_PARTIAL_TOKEN | SUBTASK_COMPLETE_TOKEN | SUBTASK_OBSOLETE_TOKEN) ; SUBTASK_NEW_TOKEN : '-' ; SUBTASK_PARTIAL_TOKEN : '/' ; SUBTASK_COMPLETE_TOKEN : '*' ; SUBTASK_OBSOLETE_TOKEN : '!' ; S : ' ' -> skip ; WS : [ ] -> skip ; EOL : ' '* '\n'+ ; INT : DIGIT+ ; fragment DIGIT : [0-9] ; START_AMOUNT : '{' ; END_AMOUNT : '}' ; // all special characters, including ' ' and digits are subtracted from the printable character range // '!' '#' '-' '/' '*' fragment STARTCHAR : ["$-)+-,.:-z|~\u00A0-\u33FF] ; // A text should not end with a space, so the ENDHAR omits the space fragment ENDCHAR : ["-~\u00A0-\u33FF] ; fragment CHAR : [ -~\u00A0-\u33FF] // ASCII and UNICODE ; // A text cannot start with a special character or has to be placed in quotes TEXT : STARTCHAR (CHAR* ENDCHAR)? ;