mirror of
https://codeberg.org/NLnet/takentaal.git
synced 2025-08-29 22:10:07 +00:00
121 lines
1.8 KiB
ANTLR
121 lines
1.8 KiB
ANTLR
/**
|
|
* This file defines the grammar for takentaal.
|
|
* It is divided into parser rules (lowercase) and lexer rules (uppercase).
|
|
* The parser splits an input into tokens accoring to the lexer rules.
|
|
* At any point, all lexer rules are considered. If multiple rules match,
|
|
* a lexer rule is chosen as follows:
|
|
* - the rule that matches the longest input is chosen
|
|
* - any implicit rule, e.g. 'a', is chosen
|
|
* - the first defined rule is chosen.
|
|
* Since this grammar has to match unquoted texts and text are usually longer
|
|
* than other token matches, texts are split into characters so that they have
|
|
* a lower ranking.
|
|
*/
|
|
|
|
grammar takentaal;
|
|
|
|
takentaal
|
|
: header
|
|
plan
|
|
;
|
|
|
|
header
|
|
: 'takentaal v0.1.0'
|
|
;
|
|
|
|
text
|
|
: (S | CHAR | WORD)+
|
|
;
|
|
|
|
plan
|
|
: PLAN_TOKEN S* amount text
|
|
description
|
|
task+
|
|
;
|
|
|
|
description
|
|
: (EOL text)*
|
|
;
|
|
|
|
task
|
|
: TASK_TOKEN S* amount text
|
|
description
|
|
subtask*
|
|
;
|
|
|
|
subtask
|
|
: SUBTASK_TOKEN S* amount text
|
|
description
|
|
;
|
|
|
|
amount
|
|
: START_AMOUNT S* INT END_AMOUNT
|
|
|
|
|
;
|
|
|
|
PLAN_TOKEN
|
|
: EOL+ '#'
|
|
;
|
|
|
|
TASK_TOKEN
|
|
: EOL+ '##'
|
|
;
|
|
|
|
SUBTASK_TOKEN
|
|
: EOL+ (SUBTASK_NEW_TOKEN | SUBTASK_PARTIAL_TOKEN | SUBTASK_COMPLETE_TOKEN | SUBTASK_OBSOLETE_TOKEN)
|
|
;
|
|
|
|
SUBTASK_NEW_TOKEN
|
|
: '-'
|
|
;
|
|
|
|
SUBTASK_PARTIAL_TOKEN
|
|
: '/'
|
|
;
|
|
|
|
SUBTASK_COMPLETE_TOKEN
|
|
: '*'
|
|
;
|
|
|
|
SUBTASK_OBSOLETE_TOKEN
|
|
: '!'
|
|
;
|
|
|
|
S
|
|
: ' '
|
|
;
|
|
|
|
|
|
WS
|
|
: [ ] -> skip
|
|
;
|
|
|
|
EOL
|
|
: ' '* '\n'+
|
|
;
|
|
|
|
INT
|
|
: DIGIT+
|
|
;
|
|
|
|
fragment DIGIT
|
|
: [0-9]
|
|
;
|
|
|
|
START_AMOUNT
|
|
: '{'
|
|
;
|
|
|
|
END_AMOUNT
|
|
: '}'
|
|
;
|
|
|
|
// Match printable characters, except space which is covered by S
|
|
CHAR
|
|
: [!-~\u00A0-\u33FF] // ASCII and UNICODE
|
|
;
|
|
|
|
// This is a performance improvement that groups chars that do not have a special meaning
|
|
WORD
|
|
: [A-Za-z\u00A0-\u33FF]+
|
|
;
|