/**
 * This file defines the grammar for takentaal.
 * It is divided into parser rules (lowercase) and lexer rules (uppercase).
 * The parser splits an input into tokens accoring to the lexer rules.
 * At any point, all lexer rules are considered. If multiple rules match,
 * a lexer rule is chosen as follows:
 *  - the rule that matches the longest input is chosen
 *  - any implicit rule, e.g. 'a', is chosen
 *  - the first defined rule is chosen.
 * Since this grammar has to match unquoted texts and text are usually longer
 * than other token matches, texts are split into characters so that they have
 * a lower ranking.
 */

grammar takentaal;

takentaal
    : header
      plan
      EOL*
    ;

header
    : 'takentaal v0.1.0'
    ;

// Any implicit and explity lexer token that may appear in a text should be listed
// in this definition.
text
    : (INT | '{' | '}' | S | CHAR | WORD)+
    ;

plan
    : PLAN_TOKEN S* amount text
      description
      task+
    ;

description
    : (EOL text)*
    ;

task
    : TASK_TOKEN S* amount text
      description
      subtask*
    ;

subtask
    : SUBTASK_TOKEN S* amount text
      description
    ;

amount
    : '{' S* INT S* '}'
    |
    ;

PLAN_TOKEN
    : EOL+ '#'
    ;

TASK_TOKEN
    : EOL+ '##'
    ;

SUBTASK_TOKEN
    : (SUBTASK_NEW_TOKEN | SUBTASK_PARTIAL_TOKEN | SUBTASK_COMPLETE_TOKEN | SUBTASK_OBSOLETE_TOKEN)
    ;

SUBTASK_NEW_TOKEN
    : EOL+ '-'
    ;

SUBTASK_PARTIAL_TOKEN
    : EOL+ '/'
    ;

SUBTASK_COMPLETE_TOKEN
    : EOL+ '*'
    ;

SUBTASK_OBSOLETE_TOKEN
    : EOL+ '!'
    ;

S
    : ' '
    ;
    

WS
    : [ ] -> skip
    ;

EOL
    : ' '* '\n'+
    ;

INT
    : DIGIT+
    ;

fragment DIGIT
    : [0-9]
    ;

// Match printable characters, except space which is covered by S
CHAR
    : [!-~\u00A0-\u33FF] // ASCII and UNICODE
    ;

// This is a performance improvement that groups chars that do not have a special meaning
WORD
    : [A-Za-z\u00A0-\u33FF]+
    ;