Add a grammar for an amendment

and remove the amendment-specific token from the plan grammar
2025-12-11 14:46:22 +00:00 · 2024-09-04 16:38:13 +02:00 · 2024-09-04 16:38:13 +02:00 · 304f90a0b5
commit 304f90a0b5
parent f1c236c751
4 changed files with 95 additions and 80 deletions
--- a/README.md
+++ b/README.md
@ -6,12 +6,12 @@
 nix develop
 # Inspect lexer
-antlr4-parse takentaal.g4 takentaal example -tokens
+antlr4-parse takentaal.g4 takentaal plan_example -tokens
 # Inspect parser
-antlr4-parse takentaal.g4 takentaal example -tree
+antlr4-parse takentaal.g4 takentaal plan_example -tree
-antlr4-parse takentaal.g4 takentaal example -gui
+antlr4-parse takentaal.g4 takentaal plan_example -gui
 # Generate TypeScript parser
 antlr4 -Dlanguage=TypeScript -o takentaal takentaal.g4
-```
+```
--- a/2
+++ b/2
@ -1,4 +1,4 @@
-takentaal v0.1.0
+takentaal-amendment v1.0
 #  {5000} Full work plan
 This is the (draft) description of the entire work plan.
--- a/20
+++ b/20
@ -0,0 +1,20 @@
 takentaal v1.0
 #  {5000} Full work plan
 This is the (draft) description of the entire work plan.
 ## {1000} First task
 This is the description of the first task.
 This description has # ' " [] symbols and ü © Ð Latin-1 chars.
 -   {500} First subtask
 -   {500} Second subtask
 ## {1000} Second task
 This is the description of the {} second task for project 10.
 -   {500} First subtask
 - {500} 2nd subtask
--- a/takentaal.g4
+++ b/takentaal.g4
@ -14,14 +14,59 @@
 grammar takentaal;
-takentaal
+takentaal : (takentaal_v1_0 | amendment_v1_0) EOF ;
-    : header
+
-      plan
+takentaal_v1_0
    : 'takentaal v1.0'
      t1_0_plan
      EOL*
    ;
-header
+t1_0_plan
-    : 'takentaal v0.1.0'
+    : PLAN_TOKEN S* amount text
      description
      t1_0_task+
    ;
 t1_0_task
    : TASK_TOKEN S* amount text
      description
      t1_0_subtask*
    ;
 t1_0_subtask
    : SUBTASK_NEW_TOKEN S* amount text
      description
    ;
 amendment_v1_0
    : 'takentaal-amendment v1.0'
      a1_0_plan
      EOL*
    ;
 a1_0_plan
    : PLAN_TOKEN S* amount text
      description
      a1_0_task+
    ;
 a1_0_task
    : TASK_TOKEN S* amount text
      description
      a1_0_subtask*
    ;
 a1_0_subtask
    : a1_0_subtask_token S* amount text
      description
    ;
 a1_0_subtask_token
    : SUBTASK_NEW_TOKEN
    | SUBTASK_PARTIAL_TOKEN
    | SUBTASK_COMPLETE_TOKEN
    | SUBTASK_OBSOLETE_TOKEN
    ;
 // Any implicit and explity lexer token that may appear in a text should be listed
@ -30,87 +75,37 @@ text
    : (INT | '{' | '}' | S | CHAR | WORD)+
    ;
-plan
+description : (EOL text)* ;
    : PLAN_TOKEN S* amount text
      description
      task+
    ;
-description
+// An amount in curly brackes or nothing
-    : (EOL text)*
+amount : '{' S* INT S* '}' | ;
    ;
-task
+// Lexer rules
    : TASK_TOKEN S* amount text
      description
      subtask*
    ;
-subtask
+PLAN_TOKEN : EOL+ '#' ;
    : SUBTASK_TOKEN S* amount text
      description
    ;
-amount
+TASK_TOKEN : EOL+ '##' ;
    : '{' S* INT S* '}'
    |
    ;
-PLAN_TOKEN
+// These lexer tokens are combined with EOL so that they only match at the
-    : EOL+ '#'
+// start of a line.
-    ;
+SUBTASK_NEW_TOKEN      : EOL+ '-' ;
 SUBTASK_PARTIAL_TOKEN  : EOL+ '/' ;
 SUBTASK_COMPLETE_TOKEN : EOL+ '*' ;
 SUBTASK_OBSOLETE_TOKEN : EOL+ '!' ;
-TASK_TOKEN
+S : ' ' ;
    : EOL+ '##'
    ;
-SUBTASK_TOKEN
+// No implicit whitespace handling
-    : (SUBTASK_NEW_TOKEN | SUBTASK_PARTIAL_TOKEN | SUBTASK_COMPLETE_TOKEN | SUBTASK_OBSOLETE_TOKEN)
+WS : [ ] -> skip ;
    ;
-SUBTASK_NEW_TOKEN
+EOL : ' '* '\n'+ ;
    : EOL+ '-'
    ;
-SUBTASK_PARTIAL_TOKEN
+INT : DIGIT+ ;
    : EOL+ '/'
    ;
-SUBTASK_COMPLETE_TOKEN
+fragment DIGIT : [0-9] ;
    : EOL+ '*'
    ;
 SUBTASK_OBSOLETE_TOKEN
    : EOL+ '!'
    ;
 S
    : ' '
    ;
 WS
    : [ ] -> skip
    ;
 EOL
    : ' '* '\n'+
    ;
 INT
    : DIGIT+
    ;
 fragment DIGIT
    : [0-9]
    ;
 // Match printable characters, except space which is covered by S
-CHAR
+CHAR : [!-~\u00A0-\u33FF] ; // ASCII and UNICODE
    : [!-~\u00A0-\u33FF] // ASCII and UNICODE
    ;
 // This is a performance improvement that groups chars that do not have a special meaning
-WORD
+WORD : [A-Za-z\u00A0-\u33FF]+ ;
    : [A-Za-z\u00A0-\u33FF]+
    ;