Add a grammar for an amendment

and remove the amendment-specific token from the plan grammar
2025-12-11 14:46:22 +00:00 · 2024-09-04 16:38:13 +02:00 · 2024-09-04 16:38:13 +02:00 · 304f90a0b5
commit 304f90a0b5
parent f1c236c751
4 changed files with 95 additions and 80 deletions
--- a/README.md
+++ b/README.md
@ -6,11 +6,11 @@
 nix develop

 # Inspect lexer
-antlr4-parse takentaal.g4 takentaal example -tokens
+antlr4-parse takentaal.g4 takentaal plan_example -tokens

 # Inspect parser
-antlr4-parse takentaal.g4 takentaal example -tree
-antlr4-parse takentaal.g4 takentaal example -gui
+antlr4-parse takentaal.g4 takentaal plan_example -tree
+antlr4-parse takentaal.g4 takentaal plan_example -gui

 # Generate TypeScript parser
 antlr4 -Dlanguage=TypeScript -o takentaal takentaal.g4
--- a/2
+++ b/2
@ -1,4 +1,4 @@
-takentaal v0.1.0
+takentaal-amendment v1.0
 #  {5000} Full work plan

 This is the (draft) description of the entire work plan.
--- a/20
+++ b/20
@ -0,0 +1,20 @@
+takentaal v1.0
+#  {5000} Full work plan
+
+This is the (draft) description of the entire work plan.
+
+## {1000} First task
+
+This is the description of the first task.
+
+This description has # ' " [] symbols and ü © Ð Latin-1 chars.
+
+-   {500} First subtask
+-   {500} Second subtask
+
+## {1000} Second task
+
+This is the description of the {} second task for project 10.
+
+-   {500} First subtask
+- {500} 2nd subtask
--- a/takentaal.g4
+++ b/takentaal.g4
@ -14,14 +14,59 @@

 grammar takentaal;

-takentaal
-    : header
-      plan
+takentaal : (takentaal_v1_0 | amendment_v1_0) EOF ;
+
+takentaal_v1_0
+    : 'takentaal v1.0'
+      t1_0_plan
      EOL*
    ;

-header
-    : 'takentaal v0.1.0'
+t1_0_plan
+    : PLAN_TOKEN S* amount text
+      description
+      t1_0_task+
+    ;
+
+t1_0_task
+    : TASK_TOKEN S* amount text
+      description
+      t1_0_subtask*
+    ;
+
+t1_0_subtask
+    : SUBTASK_NEW_TOKEN S* amount text
+      description
+    ;
+
+amendment_v1_0
+    : 'takentaal-amendment v1.0'
+      a1_0_plan
+      EOL*
+    ;
+
+a1_0_plan
+    : PLAN_TOKEN S* amount text
+      description
+      a1_0_task+
+    ;
+
+a1_0_task
+    : TASK_TOKEN S* amount text
+      description
+      a1_0_subtask*
+    ;
+
+a1_0_subtask
+    : a1_0_subtask_token S* amount text
+      description
+    ;
+
+a1_0_subtask_token
+    : SUBTASK_NEW_TOKEN
+    | SUBTASK_PARTIAL_TOKEN
+    | SUBTASK_COMPLETE_TOKEN
+    | SUBTASK_OBSOLETE_TOKEN
    ;

 // Any implicit and explity lexer token that may appear in a text should be listed
@ -30,87 +75,37 @@ text
    : (INT | '{' | '}' | S | CHAR | WORD)+
    ;

-plan
-    : PLAN_TOKEN S* amount text
-      description
-      task+
-    ;
+description : (EOL text)* ;

-description
-    : (EOL text)*
-    ;
+// An amount in curly brackes or nothing
+amount : '{' S* INT S* '}' | ;

-task
-    : TASK_TOKEN S* amount text
-      description
-      subtask*
-    ;
+// Lexer rules

-subtask
-    : SUBTASK_TOKEN S* amount text
-      description
-    ;
+PLAN_TOKEN : EOL+ '#' ;

-amount
-    : '{' S* INT S* '}'
-    |
-    ;
+TASK_TOKEN : EOL+ '##' ;

-PLAN_TOKEN
-    : EOL+ '#'
-    ;
+// These lexer tokens are combined with EOL so that they only match at the
+// start of a line.
+SUBTASK_NEW_TOKEN      : EOL+ '-' ;
+SUBTASK_PARTIAL_TOKEN  : EOL+ '/' ;
+SUBTASK_COMPLETE_TOKEN : EOL+ '*' ;
+SUBTASK_OBSOLETE_TOKEN : EOL+ '!' ;

-TASK_TOKEN
-    : EOL+ '##'
-    ;
+S : ' ' ;

-SUBTASK_TOKEN
-    : (SUBTASK_NEW_TOKEN | SUBTASK_PARTIAL_TOKEN | SUBTASK_COMPLETE_TOKEN | SUBTASK_OBSOLETE_TOKEN)
-    ;
+// No implicit whitespace handling
+WS : [ ] -> skip ;

-SUBTASK_NEW_TOKEN
-    : EOL+ '-'
-    ;
+EOL : ' '* '\n'+ ;

-SUBTASK_PARTIAL_TOKEN
-    : EOL+ '/'
-    ;
+INT : DIGIT+ ;

-SUBTASK_COMPLETE_TOKEN
-    : EOL+ '*'
-    ;
-
-SUBTASK_OBSOLETE_TOKEN
-    : EOL+ '!'
-    ;
-
-S
-    : ' '
-    ;
-    
-
-WS
-    : [ ] -> skip
-    ;
-
-EOL
-    : ' '* '\n'+
-    ;
-
-INT
-    : DIGIT+
-    ;
-
-fragment DIGIT
-    : [0-9]
-    ;
+fragment DIGIT : [0-9] ;

 // Match printable characters, except space which is covered by S
-CHAR
-    : [!-~\u00A0-\u33FF] // ASCII and UNICODE
-    ;
+CHAR : [!-~\u00A0-\u33FF] ; // ASCII and UNICODE

 // This is a performance improvement that groups chars that do not have a special meaning
-WORD
-    : [A-Za-z\u00A0-\u33FF]+
-    ;
+WORD : [A-Za-z\u00A0-\u33FF]+ ;