takentaal/takentaal.g4

/**
 * This file defines the grammar for takentaal.
 * It is divided into parser rules (lowercase) and lexer rules (uppercase).
 * The parser splits an input into tokens accoring to the lexer rules.
 * At any point, all lexer rules are considered. If multiple rules match,
 * a lexer rule is chosen as follows:
 *  - the rule that matches the longest input is chosen
 *  - any implicit rule, e.g. 'a', is chosen
 *  - the first defined rule is chosen.
 * Since this grammar has to match unquoted texts and text are usually longer
 * than other token matches, the TEXT rule disallows many characters as the
 * first character to start with.
 */

grammar takentaal;

takentaal
    : header
      plan
    ;

header
    : 'takentaal v0.1.0' EOL
    ;

plan
    : PLAN_TOKEN S* amount TEXT EOL
      description
      task+
    ;

description
    : (TEXT EOL)*
    ;

task
    : TASK_TOKEN S* amount TEXT EOL
      description
      subtask*
    ;

subtask
    : SUBTASK_TOKEN S* amount TEXT EOL
      description
    ;

amount
    : START_AMOUNT S* INT END_AMOUNT
    |
    ;

PLAN_TOKEN
    : '#'
    ;

TASK_TOKEN
    : '##'
    ;

SUBTASK_TOKEN
    : (SUBTASK_NEW_TOKEN | SUBTASK_PARTIAL_TOKEN | SUBTASK_COMPLETE_TOKEN | SUBTASK_OBSOLETE_TOKEN)
    ;

SUBTASK_NEW_TOKEN
    : '-'
    ;

SUBTASK_PARTIAL_TOKEN
    : '/'
    ;

SUBTASK_COMPLETE_TOKEN
    : '*'
    ;

SUBTASK_OBSOLETE_TOKEN
    : '!'
    ;

S
    : ' ' -> skip
    ;


WS
    : [ ] -> skip
    ;

EOL
    : ' '* '\n'+
    ;

INT
    : DIGIT+
    ;

fragment DIGIT
    : [0-9]
    ;

START_AMOUNT
    : '{'
    ;

END_AMOUNT
    : '}'
    ;

// all special characters, including ' ' and digits are subtracted from the printable character range
// '!' '#' '-' '/' '*'
fragment STARTCHAR
    : ["$-)+-,.:-z|~\u00A0-\u33FF]
    ;

// A text should not end with a space, so the ENDHAR omits the space
fragment ENDCHAR
    : ["-~\u00A0-\u33FF]
    ;

fragment CHAR
    : [ -~\u00A0-\u33FF] // ASCII and UNICODE
    ;

// A text cannot start with a special character or has to be placed in quotes
TEXT
    : STARTCHAR (CHAR* ENDCHAR)?
    ;