mirror of
https://codeberg.org/NLnet/takentaal.git
synced 2025-08-29 14:00:06 +00:00
113 lines
2.4 KiB
ANTLR
113 lines
2.4 KiB
ANTLR
/**
|
|
* This file defines the grammar for takentaal.
|
|
* It is divided into parser rules (lowercase names) and lexer rules (uppercase
|
|
* names).
|
|
* The parser splits an input into tokens accoring to the lexer rules.
|
|
* At any point in the input text, all lexer rules are considered. If multiple
|
|
* rules match, a lexer rule is chosen as follows:
|
|
* - the rule that matches the longest input is chosen
|
|
* - any implicit rule, e.g. 'a', is chosen
|
|
* - the first defined rule is chosen.
|
|
* Since this grammar has to match unquoted texts and texts are usually longer
|
|
* than other token matches, texts are split into characters so that they have
|
|
* a lower ranking.
|
|
*/
|
|
|
|
grammar takentaal;
|
|
|
|
takentaal : (takentaal_v1_0 | amendment_v1_0) EOF ;
|
|
|
|
takentaal_v1_0
|
|
: 'takentaal v1.0'
|
|
t1_0_plan
|
|
EOL*
|
|
;
|
|
|
|
t1_0_plan
|
|
: PLAN_TOKEN S* amount text
|
|
description
|
|
t1_0_task+
|
|
;
|
|
|
|
t1_0_task
|
|
: TASK_TOKEN S* amount text
|
|
description
|
|
t1_0_subtask*
|
|
;
|
|
|
|
t1_0_subtask
|
|
: SUBTASK_NEW_TOKEN S* amount text
|
|
description
|
|
;
|
|
|
|
amendment_v1_0
|
|
: 'takentaal-amendment v1.0'
|
|
a1_0_plan
|
|
EOL*
|
|
;
|
|
|
|
a1_0_plan
|
|
: PLAN_TOKEN S* amount text
|
|
description
|
|
a1_0_task+
|
|
;
|
|
|
|
a1_0_task
|
|
: TASK_TOKEN S* amount text
|
|
description
|
|
a1_0_subtask*
|
|
;
|
|
|
|
a1_0_subtask
|
|
: a1_0_subtask_token S* amount text
|
|
description
|
|
;
|
|
|
|
a1_0_subtask_token
|
|
: SUBTASK_NEW_TOKEN
|
|
| SUBTASK_PARTIAL_TOKEN
|
|
| SUBTASK_COMPLETE_TOKEN
|
|
| SUBTASK_OBSOLETE_TOKEN
|
|
;
|
|
|
|
// Any implicit and explity lexer token that may appear in a text should be
|
|
// listed in this definition.
|
|
text
|
|
: (INT | '{' | '}' | S | CHAR | WORD)+
|
|
;
|
|
|
|
description : (EOL text)* ;
|
|
|
|
// An amount in curly brackes or nothing
|
|
amount : '{' S* INT S* '} ' | ;
|
|
|
|
// Lexer rules
|
|
|
|
PLAN_TOKEN : EOL+ '# ' ;
|
|
|
|
TASK_TOKEN : EOL+ '## ' ;
|
|
|
|
// These lexer tokens are combined with EOL so that they only match at the
|
|
// start of a line.
|
|
SUBTASK_NEW_TOKEN : EOL+ '- ' ;
|
|
SUBTASK_PARTIAL_TOKEN : EOL+ '/ ' ;
|
|
SUBTASK_COMPLETE_TOKEN : EOL+ '* ' ;
|
|
SUBTASK_OBSOLETE_TOKEN : EOL+ '! ' ;
|
|
|
|
S : ' ' ;
|
|
|
|
// No implicit whitespace handling
|
|
WS : [ ] -> skip ;
|
|
|
|
EOL : ' '* '\n'+ ;
|
|
|
|
INT : DIGIT+ ;
|
|
|
|
fragment DIGIT : [0-9] ;
|
|
|
|
// Match printable characters, except space which is covered by S
|
|
CHAR : [!-~\u00A0-\u33FF] ; // ASCII and UNICODE
|
|
|
|
// This is a performance improvement that groups chars that do not have a
|
|
// special meaning
|
|
WORD : [A-Za-z\u00A0-\u33FF]+ ;
|