takentaal/takentaal.g4
Jos van den Oever c0b60b1c4f Add more characters that should be ommitted from STARTCHAR
and add an explanation for why this is the case.
2024-09-02 12:49:56 +02:00

127 lines
2.1 KiB
ANTLR

/**
* This file defines the grammar for takentaal.
* It is divided into parser rules (lowercase) and lexer rules (uppercase).
* The parser splits an input into tokens accoring to the lexer rules.
* At any point, all lexer rules are considered. If multiple rules match,
* a lexer rule is chosen as follows:
* - the rule that matches the longest input is chosen
* - any implicit rule, e.g. 'a', is chosen
* - the first defined rule is chosen.
* Since this grammar has to match unquoted texts and text are usually longer
* than other token matches, the TEXT rule disallows many characters as the
* first character to start with.
*/
grammar takentaal;
takentaal
: header
plan
;
header
: 'takentaal v0.1.0' EOL
;
plan
: PLAN_TOKEN S* amount TEXT EOL
description
task+
;
description
: (TEXT EOL)*
;
task
: TASK_TOKEN S* amount TEXT EOL
description
subtask*
;
subtask
: SUBTASK_TOKEN S* amount TEXT EOL
description
;
amount
: START_AMOUNT S* INT END_AMOUNT
|
;
PLAN_TOKEN
: '#'
;
TASK_TOKEN
: '##'
;
SUBTASK_TOKEN
: (SUBTASK_NEW_TOKEN | SUBTASK_PARTIAL_TOKEN | SUBTASK_COMPLETE_TOKEN | SUBTASK_OBSOLETE_TOKEN)
;
SUBTASK_NEW_TOKEN
: '-'
;
SUBTASK_PARTIAL_TOKEN
: '/'
;
SUBTASK_COMPLETE_TOKEN
: '*'
;
SUBTASK_OBSOLETE_TOKEN
: '!'
;
S
: ' ' -> skip
;
WS
: [ ] -> skip
;
EOL
: ' '* '\n'+
;
INT
: DIGIT+
;
fragment DIGIT
: [0-9]
;
START_AMOUNT
: '{'
;
END_AMOUNT
: '}'
;
// all special characters, including ' ' and digits are subtracted from the printable character range
// '!' '#' '-' '/' '*'
fragment STARTCHAR
: ["$-)+-,.:-z|~\u00A0-\u33FF]
;
// A text should not end with a space, so the ENDHAR omits the space
fragment ENDCHAR
: ["-~\u00A0-\u33FF]
;
fragment CHAR
: [ -~\u00A0-\u33FF] // ASCII and UNICODE
;
// A text cannot start with a special character or has to be placed in quotes
TEXT
: STARTCHAR (CHAR* ENDCHAR)?
;