takentaal/takentaal.g4
Jos van den Oever 9b1da89cdf Require a space after an amount
Otherwise the space becomes part of the description.
2024-10-01 13:28:41 +02:00

113 lines
2.4 KiB
ANTLR

/**
* This file defines the grammar for takentaal.
* It is divided into parser rules (lowercase names) and lexer rules (uppercase
* names).
* The parser splits an input into tokens accoring to the lexer rules.
* At any point in the input text, all lexer rules are considered. If multiple
* rules match, a lexer rule is chosen as follows:
* - the rule that matches the longest input is chosen
* - any implicit rule, e.g. 'a', is chosen
* - the first defined rule is chosen.
* Since this grammar has to match unquoted texts and texts are usually longer
* than other token matches, texts are split into characters so that they have
* a lower ranking.
*/
grammar takentaal;
takentaal : (takentaal_v1_0 | amendment_v1_0) EOF ;
takentaal_v1_0
: 'takentaal v1.0'
t1_0_plan
EOL*
;
t1_0_plan
: PLAN_TOKEN S* amount text
description
t1_0_task+
;
t1_0_task
: TASK_TOKEN S* amount text
description
t1_0_subtask*
;
t1_0_subtask
: SUBTASK_NEW_TOKEN S* amount text
description
;
amendment_v1_0
: 'takentaal-amendment v1.0'
a1_0_plan
EOL*
;
a1_0_plan
: PLAN_TOKEN S* amount text
description
a1_0_task+
;
a1_0_task
: TASK_TOKEN S* amount text
description
a1_0_subtask*
;
a1_0_subtask
: a1_0_subtask_token S* amount text
description
;
a1_0_subtask_token
: SUBTASK_NEW_TOKEN
| SUBTASK_PARTIAL_TOKEN
| SUBTASK_COMPLETE_TOKEN
| SUBTASK_OBSOLETE_TOKEN
;
// Any implicit and explity lexer token that may appear in a text should be
// listed in this definition.
text
: (INT | '{' | '}' | S | CHAR | WORD)+
;
description : (EOL text)* ;
// An amount in curly brackes or nothing
amount : '{' S* INT S* '} ' | ;
// Lexer rules
PLAN_TOKEN : EOL+ '# ' ;
TASK_TOKEN : EOL+ '## ' ;
// These lexer tokens are combined with EOL so that they only match at the
// start of a line.
SUBTASK_NEW_TOKEN : EOL+ '- ' ;
SUBTASK_PARTIAL_TOKEN : EOL+ '/ ' ;
SUBTASK_COMPLETE_TOKEN : EOL+ '* ' ;
SUBTASK_OBSOLETE_TOKEN : EOL+ '! ' ;
S : ' ' ;
// No implicit whitespace handling
WS : [ ] -> skip ;
EOL : ' '* '\n'+ ;
INT : DIGIT+ ;
fragment DIGIT : [0-9] ;
// Match printable characters, except space which is covered by S
CHAR : [!-~\u00A0-\u33FF] ; // ASCII and UNICODE
// This is a performance improvement that groups chars that do not have a
// special meaning
WORD : [A-Za-z\u00A0-\u33FF]+ ;