Add a grammar for an amendment

and remove the amendment-specific token from the plan grammar
This commit is contained in:
Jos van den Oever 2024-09-04 16:38:13 +02:00
parent f1c236c751
commit 304f90a0b5
4 changed files with 95 additions and 80 deletions

View file

@ -6,12 +6,12 @@
nix develop nix develop
# Inspect lexer # Inspect lexer
antlr4-parse takentaal.g4 takentaal example -tokens antlr4-parse takentaal.g4 takentaal plan_example -tokens
# Inspect parser # Inspect parser
antlr4-parse takentaal.g4 takentaal example -tree antlr4-parse takentaal.g4 takentaal plan_example -tree
antlr4-parse takentaal.g4 takentaal example -gui antlr4-parse takentaal.g4 takentaal plan_example -gui
# Generate TypeScript parser # Generate TypeScript parser
antlr4 -Dlanguage=TypeScript -o takentaal takentaal.g4 antlr4 -Dlanguage=TypeScript -o takentaal takentaal.g4
``` ```

View file

@ -1,4 +1,4 @@
takentaal v0.1.0 takentaal-amendment v1.0
# {5000} Full work plan # {5000} Full work plan
This is the (draft) description of the entire work plan. This is the (draft) description of the entire work plan.

20
plan_example Normal file
View file

@ -0,0 +1,20 @@
takentaal v1.0
# {5000} Full work plan
This is the (draft) description of the entire work plan.
## {1000} First task
This is the description of the first task.
This description has # ' " [] symbols and ü © Ð Latin-1 chars.
- {500} First subtask
- {500} Second subtask
## {1000} Second task
This is the description of the {} second task for project 10.
- {500} First subtask
- {500} 2nd subtask

View file

@ -14,14 +14,59 @@
grammar takentaal; grammar takentaal;
takentaal takentaal : (takentaal_v1_0 | amendment_v1_0) EOF ;
: header
plan takentaal_v1_0
: 'takentaal v1.0'
t1_0_plan
EOL* EOL*
; ;
header t1_0_plan
: 'takentaal v0.1.0' : PLAN_TOKEN S* amount text
description
t1_0_task+
;
t1_0_task
: TASK_TOKEN S* amount text
description
t1_0_subtask*
;
t1_0_subtask
: SUBTASK_NEW_TOKEN S* amount text
description
;
amendment_v1_0
: 'takentaal-amendment v1.0'
a1_0_plan
EOL*
;
a1_0_plan
: PLAN_TOKEN S* amount text
description
a1_0_task+
;
a1_0_task
: TASK_TOKEN S* amount text
description
a1_0_subtask*
;
a1_0_subtask
: a1_0_subtask_token S* amount text
description
;
a1_0_subtask_token
: SUBTASK_NEW_TOKEN
| SUBTASK_PARTIAL_TOKEN
| SUBTASK_COMPLETE_TOKEN
| SUBTASK_OBSOLETE_TOKEN
; ;
// Any implicit and explity lexer token that may appear in a text should be listed // Any implicit and explity lexer token that may appear in a text should be listed
@ -30,87 +75,37 @@ text
: (INT | '{' | '}' | S | CHAR | WORD)+ : (INT | '{' | '}' | S | CHAR | WORD)+
; ;
plan description : (EOL text)* ;
: PLAN_TOKEN S* amount text
description
task+
;
description // An amount in curly brackes or nothing
: (EOL text)* amount : '{' S* INT S* '}' | ;
;
task // Lexer rules
: TASK_TOKEN S* amount text
description
subtask*
;
subtask PLAN_TOKEN : EOL+ '#' ;
: SUBTASK_TOKEN S* amount text
description
;
amount TASK_TOKEN : EOL+ '##' ;
: '{' S* INT S* '}'
|
;
PLAN_TOKEN // These lexer tokens are combined with EOL so that they only match at the
: EOL+ '#' // start of a line.
; SUBTASK_NEW_TOKEN : EOL+ '-' ;
SUBTASK_PARTIAL_TOKEN : EOL+ '/' ;
SUBTASK_COMPLETE_TOKEN : EOL+ '*' ;
SUBTASK_OBSOLETE_TOKEN : EOL+ '!' ;
TASK_TOKEN S : ' ' ;
: EOL+ '##'
;
SUBTASK_TOKEN // No implicit whitespace handling
: (SUBTASK_NEW_TOKEN | SUBTASK_PARTIAL_TOKEN | SUBTASK_COMPLETE_TOKEN | SUBTASK_OBSOLETE_TOKEN) WS : [ ] -> skip ;
;
SUBTASK_NEW_TOKEN EOL : ' '* '\n'+ ;
: EOL+ '-'
;
SUBTASK_PARTIAL_TOKEN INT : DIGIT+ ;
: EOL+ '/'
;
SUBTASK_COMPLETE_TOKEN fragment DIGIT : [0-9] ;
: EOL+ '*'
;
SUBTASK_OBSOLETE_TOKEN
: EOL+ '!'
;
S
: ' '
;
WS
: [ ] -> skip
;
EOL
: ' '* '\n'+
;
INT
: DIGIT+
;
fragment DIGIT
: [0-9]
;
// Match printable characters, except space which is covered by S // Match printable characters, except space which is covered by S
CHAR CHAR : [!-~\u00A0-\u33FF] ; // ASCII and UNICODE
: [!-~\u00A0-\u33FF] // ASCII and UNICODE
;
// This is a performance improvement that groups chars that do not have a special meaning // This is a performance improvement that groups chars that do not have a special meaning
WORD WORD : [A-Za-z\u00A0-\u33FF]+ ;
: [A-Za-z\u00A0-\u33FF]+
;