Add a grammar for an amendment

and remove the amendment-specific token from the plan grammar
This commit is contained in:
Jos van den Oever 2024-09-04 16:38:13 +02:00
parent f1c236c751
commit 304f90a0b5
4 changed files with 95 additions and 80 deletions

View file

@ -6,11 +6,11 @@
nix develop
# Inspect lexer
antlr4-parse takentaal.g4 takentaal example -tokens
antlr4-parse takentaal.g4 takentaal plan_example -tokens
# Inspect parser
antlr4-parse takentaal.g4 takentaal example -tree
antlr4-parse takentaal.g4 takentaal example -gui
antlr4-parse takentaal.g4 takentaal plan_example -tree
antlr4-parse takentaal.g4 takentaal plan_example -gui
# Generate TypeScript parser
antlr4 -Dlanguage=TypeScript -o takentaal takentaal.g4

View file

@ -1,4 +1,4 @@
takentaal v0.1.0
takentaal-amendment v1.0
# {5000} Full work plan
This is the (draft) description of the entire work plan.

20
plan_example Normal file
View file

@ -0,0 +1,20 @@
takentaal v1.0
# {5000} Full work plan
This is the (draft) description of the entire work plan.
## {1000} First task
This is the description of the first task.
This description has # ' " [] symbols and ü © Ð Latin-1 chars.
- {500} First subtask
- {500} Second subtask
## {1000} Second task
This is the description of the {} second task for project 10.
- {500} First subtask
- {500} 2nd subtask

View file

@ -14,14 +14,59 @@
grammar takentaal;
takentaal
: header
plan
takentaal : (takentaal_v1_0 | amendment_v1_0) EOF ;
takentaal_v1_0
: 'takentaal v1.0'
t1_0_plan
EOL*
;
header
: 'takentaal v0.1.0'
t1_0_plan
: PLAN_TOKEN S* amount text
description
t1_0_task+
;
t1_0_task
: TASK_TOKEN S* amount text
description
t1_0_subtask*
;
t1_0_subtask
: SUBTASK_NEW_TOKEN S* amount text
description
;
amendment_v1_0
: 'takentaal-amendment v1.0'
a1_0_plan
EOL*
;
a1_0_plan
: PLAN_TOKEN S* amount text
description
a1_0_task+
;
a1_0_task
: TASK_TOKEN S* amount text
description
a1_0_subtask*
;
a1_0_subtask
: a1_0_subtask_token S* amount text
description
;
a1_0_subtask_token
: SUBTASK_NEW_TOKEN
| SUBTASK_PARTIAL_TOKEN
| SUBTASK_COMPLETE_TOKEN
| SUBTASK_OBSOLETE_TOKEN
;
// Any implicit and explity lexer token that may appear in a text should be listed
@ -30,87 +75,37 @@ text
: (INT | '{' | '}' | S | CHAR | WORD)+
;
plan
: PLAN_TOKEN S* amount text
description
task+
;
description : (EOL text)* ;
description
: (EOL text)*
;
// An amount in curly brackes or nothing
amount : '{' S* INT S* '}' | ;
task
: TASK_TOKEN S* amount text
description
subtask*
;
// Lexer rules
subtask
: SUBTASK_TOKEN S* amount text
description
;
PLAN_TOKEN : EOL+ '#' ;
amount
: '{' S* INT S* '}'
|
;
TASK_TOKEN : EOL+ '##' ;
PLAN_TOKEN
: EOL+ '#'
;
// These lexer tokens are combined with EOL so that they only match at the
// start of a line.
SUBTASK_NEW_TOKEN : EOL+ '-' ;
SUBTASK_PARTIAL_TOKEN : EOL+ '/' ;
SUBTASK_COMPLETE_TOKEN : EOL+ '*' ;
SUBTASK_OBSOLETE_TOKEN : EOL+ '!' ;
TASK_TOKEN
: EOL+ '##'
;
S : ' ' ;
SUBTASK_TOKEN
: (SUBTASK_NEW_TOKEN | SUBTASK_PARTIAL_TOKEN | SUBTASK_COMPLETE_TOKEN | SUBTASK_OBSOLETE_TOKEN)
;
// No implicit whitespace handling
WS : [ ] -> skip ;
SUBTASK_NEW_TOKEN
: EOL+ '-'
;
EOL : ' '* '\n'+ ;
SUBTASK_PARTIAL_TOKEN
: EOL+ '/'
;
INT : DIGIT+ ;
SUBTASK_COMPLETE_TOKEN
: EOL+ '*'
;
SUBTASK_OBSOLETE_TOKEN
: EOL+ '!'
;
S
: ' '
;
WS
: [ ] -> skip
;
EOL
: ' '* '\n'+
;
INT
: DIGIT+
;
fragment DIGIT
: [0-9]
;
fragment DIGIT : [0-9] ;
// Match printable characters, except space which is covered by S
CHAR
: [!-~\u00A0-\u33FF] // ASCII and UNICODE
;
CHAR : [!-~\u00A0-\u33FF] ; // ASCII and UNICODE
// This is a performance improvement that groups chars that do not have a special meaning
WORD
: [A-Za-z\u00A0-\u33FF]+
;
WORD : [A-Za-z\u00A0-\u33FF]+ ;