Add a grammar for an amendment

and remove the amendment-specific token from the plan grammar
This commit is contained in:
Jos van den Oever 2024-09-04 16:38:13 +02:00
parent f1c236c751
commit 304f90a0b5
4 changed files with 95 additions and 80 deletions

View file

@ -14,14 +14,59 @@
grammar takentaal;
takentaal
: header
plan
takentaal : (takentaal_v1_0 | amendment_v1_0) EOF ;
takentaal_v1_0
: 'takentaal v1.0'
t1_0_plan
EOL*
;
header
: 'takentaal v0.1.0'
t1_0_plan
: PLAN_TOKEN S* amount text
description
t1_0_task+
;
t1_0_task
: TASK_TOKEN S* amount text
description
t1_0_subtask*
;
t1_0_subtask
: SUBTASK_NEW_TOKEN S* amount text
description
;
amendment_v1_0
: 'takentaal-amendment v1.0'
a1_0_plan
EOL*
;
a1_0_plan
: PLAN_TOKEN S* amount text
description
a1_0_task+
;
a1_0_task
: TASK_TOKEN S* amount text
description
a1_0_subtask*
;
a1_0_subtask
: a1_0_subtask_token S* amount text
description
;
a1_0_subtask_token
: SUBTASK_NEW_TOKEN
| SUBTASK_PARTIAL_TOKEN
| SUBTASK_COMPLETE_TOKEN
| SUBTASK_OBSOLETE_TOKEN
;
// Any implicit and explity lexer token that may appear in a text should be listed
@ -30,87 +75,37 @@ text
: (INT | '{' | '}' | S | CHAR | WORD)+
;
plan
: PLAN_TOKEN S* amount text
description
task+
;
description : (EOL text)* ;
description
: (EOL text)*
;
// An amount in curly brackes or nothing
amount : '{' S* INT S* '}' | ;
task
: TASK_TOKEN S* amount text
description
subtask*
;
// Lexer rules
subtask
: SUBTASK_TOKEN S* amount text
description
;
PLAN_TOKEN : EOL+ '#' ;
amount
: '{' S* INT S* '}'
|
;
TASK_TOKEN : EOL+ '##' ;
PLAN_TOKEN
: EOL+ '#'
;
// These lexer tokens are combined with EOL so that they only match at the
// start of a line.
SUBTASK_NEW_TOKEN : EOL+ '-' ;
SUBTASK_PARTIAL_TOKEN : EOL+ '/' ;
SUBTASK_COMPLETE_TOKEN : EOL+ '*' ;
SUBTASK_OBSOLETE_TOKEN : EOL+ '!' ;
TASK_TOKEN
: EOL+ '##'
;
S : ' ' ;
SUBTASK_TOKEN
: (SUBTASK_NEW_TOKEN | SUBTASK_PARTIAL_TOKEN | SUBTASK_COMPLETE_TOKEN | SUBTASK_OBSOLETE_TOKEN)
;
// No implicit whitespace handling
WS : [ ] -> skip ;
SUBTASK_NEW_TOKEN
: EOL+ '-'
;
EOL : ' '* '\n'+ ;
SUBTASK_PARTIAL_TOKEN
: EOL+ '/'
;
INT : DIGIT+ ;
SUBTASK_COMPLETE_TOKEN
: EOL+ '*'
;
SUBTASK_OBSOLETE_TOKEN
: EOL+ '!'
;
S
: ' '
;
WS
: [ ] -> skip
;
EOL
: ' '* '\n'+
;
INT
: DIGIT+
;
fragment DIGIT
: [0-9]
;
fragment DIGIT : [0-9] ;
// Match printable characters, except space which is covered by S
CHAR
: [!-~\u00A0-\u33FF] // ASCII and UNICODE
;
CHAR : [!-~\u00A0-\u33FF] ; // ASCII and UNICODE
// This is a performance improvement that groups chars that do not have a special meaning
WORD
: [A-Za-z\u00A0-\u33FF]+
;
WORD : [A-Za-z\u00A0-\u33FF]+ ;