diff --git a/example b/example index 9a10fc8..41968b7 100644 --- a/example +++ b/example @@ -1,7 +1,7 @@ takentaal v0.1.0 # {5000} Full work plan -This is the description of the entire work plan. +This is the (draft) description of the entire work plan. ## {1000} First task @@ -12,9 +12,9 @@ This description has # ' " [] symbols and ü © Ð Latin-1 chars. - {500} First subtask / {500} Second subtask -## Second task {1000} +## {1000} Second task This is the description of the second task. * {500} First subtask -- {500} Second subtask +- {500} \2nd subtask diff --git a/takentaal.g4 b/takentaal.g4 index 5679125..f8c8d83 100644 --- a/takentaal.g4 +++ b/takentaal.g4 @@ -1,3 +1,17 @@ +/** + * This file defines the grammar for takentaal. + * It is divided into parser rules (lowercase) and lexer rules (uppercase). + * The parser splits an input into tokens accoring to the lexer rules. + * At any point, all lexer rules are considered. If multiple rules match, + * a lexer rule is chosen as follows: + * - the rule that matches the longest input is chosen + * - any implicit rule, e.g. 'a', is chosen + * - the first defined rule is chosen. + * Since this grammar has to match unquoted texts and text are usually longer + * than other token matches, the TEXT rule disallows many characters as the + * first character to start with. + */ + grammar takentaal; takentaal @@ -73,14 +87,14 @@ WS ; EOL - : '\n'+ + : ' '* '\n'+ ; INT : DIGIT+ ; -DIGIT +fragment DIGIT : [0-9] ; @@ -92,14 +106,22 @@ END_AMOUNT : '}' ; -STARTCHAR - : [!-"$-/:-\u007A\u007C\u007E] +// all special characters, including ' ' and digits are subtracted from the printable character range +// '!' '#' '-' '/' '*' +fragment STARTCHAR + : ["$-)+-,.:-z|~\u00A0-\u33FF] ; -CHAR - : [ -\u007E\u00A0-\u33FF] // ASCII and UNICODE +// A text should not end with a space, so the ENDHAR omits the space +fragment ENDCHAR + : ["-~\u00A0-\u33FF] ; +fragment CHAR + : [ -~\u00A0-\u33FF] // ASCII and UNICODE + ; + +// A text cannot start with a special character or has to be placed in quotes TEXT - : STARTCHAR CHAR* + : STARTCHAR (CHAR* ENDCHAR)? ;