/** * This file defines the grammar for takentaal. * It is divided into parser rules (lowercase) and lexer rules (uppercase). * The parser splits an input into tokens accoring to the lexer rules. * At any point, all lexer rules are considered. If multiple rules match, * a lexer rule is chosen as follows: * - the rule that matches the longest input is chosen * - any implicit rule, e.g. 'a', is chosen * - the first defined rule is chosen. * Since this grammar has to match unquoted texts and text are usually longer * than other token matches, texts are split into characters so that they have * a lower ranking. */ grammar takentaal; takentaal : header plan EOL* ; header : 'takentaal v0.1.0' ; // Any implicit and explity lexer token that may appear in a text should be listed // in this definition. text : (INT | '{' | '}' | S | CHAR | WORD)+ ; plan : PLAN_TOKEN S* amount text description task+ ; description : (EOL text)* ; task : TASK_TOKEN S* amount text description subtask* ; subtask : SUBTASK_TOKEN S* amount text description ; amount : '{' S* INT S* '}' | ; PLAN_TOKEN : EOL+ '#' ; TASK_TOKEN : EOL+ '##' ; SUBTASK_TOKEN : (SUBTASK_NEW_TOKEN | SUBTASK_PARTIAL_TOKEN | SUBTASK_COMPLETE_TOKEN | SUBTASK_OBSOLETE_TOKEN) ; SUBTASK_NEW_TOKEN : EOL+ '-' ; SUBTASK_PARTIAL_TOKEN : EOL+ '/' ; SUBTASK_COMPLETE_TOKEN : EOL+ '*' ; SUBTASK_OBSOLETE_TOKEN : EOL+ '!' ; S : ' ' ; WS : [ ] -> skip ; EOL : ' '* '\n'+ ; INT : DIGIT+ ; fragment DIGIT : [0-9] ; // Match printable characters, except space which is covered by S CHAR : [!-~\u00A0-\u33FF] // ASCII and UNICODE ; // This is a performance improvement that groups chars that do not have a special meaning WORD : [A-Za-z\u00A0-\u33FF]+ ;