mirror of
https://codeberg.org/NLnet/takentaal.git
synced 2025-08-29 22:10:07 +00:00
Add more characters that should be ommitted from STARTCHAR
and add an explanation for why this is the case.
This commit is contained in:
parent
ecc5a1aa00
commit
c0b60b1c4f
2 changed files with 32 additions and 10 deletions
6
example
6
example
|
@ -1,7 +1,7 @@
|
||||||
takentaal v0.1.0
|
takentaal v0.1.0
|
||||||
# {5000} Full work plan
|
# {5000} Full work plan
|
||||||
|
|
||||||
This is the description of the entire work plan.
|
This is the (draft) description of the entire work plan.
|
||||||
|
|
||||||
## {1000} First task
|
## {1000} First task
|
||||||
|
|
||||||
|
@ -12,9 +12,9 @@ This description has # ' " [] symbols and ü © Ð Latin-1 chars.
|
||||||
- {500} First subtask
|
- {500} First subtask
|
||||||
/ {500} Second subtask
|
/ {500} Second subtask
|
||||||
|
|
||||||
## Second task {1000}
|
## {1000} Second task
|
||||||
|
|
||||||
This is the description of the second task.
|
This is the description of the second task.
|
||||||
|
|
||||||
* {500} First subtask
|
* {500} First subtask
|
||||||
- {500} Second subtask
|
- {500} \2nd subtask
|
||||||
|
|
36
takentaal.g4
36
takentaal.g4
|
@ -1,3 +1,17 @@
|
||||||
|
/**
|
||||||
|
* This file defines the grammar for takentaal.
|
||||||
|
* It is divided into parser rules (lowercase) and lexer rules (uppercase).
|
||||||
|
* The parser splits an input into tokens accoring to the lexer rules.
|
||||||
|
* At any point, all lexer rules are considered. If multiple rules match,
|
||||||
|
* a lexer rule is chosen as follows:
|
||||||
|
* - the rule that matches the longest input is chosen
|
||||||
|
* - any implicit rule, e.g. 'a', is chosen
|
||||||
|
* - the first defined rule is chosen.
|
||||||
|
* Since this grammar has to match unquoted texts and text are usually longer
|
||||||
|
* than other token matches, the TEXT rule disallows many characters as the
|
||||||
|
* first character to start with.
|
||||||
|
*/
|
||||||
|
|
||||||
grammar takentaal;
|
grammar takentaal;
|
||||||
|
|
||||||
takentaal
|
takentaal
|
||||||
|
@ -73,14 +87,14 @@ WS
|
||||||
;
|
;
|
||||||
|
|
||||||
EOL
|
EOL
|
||||||
: '\n'+
|
: ' '* '\n'+
|
||||||
;
|
;
|
||||||
|
|
||||||
INT
|
INT
|
||||||
: DIGIT+
|
: DIGIT+
|
||||||
;
|
;
|
||||||
|
|
||||||
DIGIT
|
fragment DIGIT
|
||||||
: [0-9]
|
: [0-9]
|
||||||
;
|
;
|
||||||
|
|
||||||
|
@ -92,14 +106,22 @@ END_AMOUNT
|
||||||
: '}'
|
: '}'
|
||||||
;
|
;
|
||||||
|
|
||||||
STARTCHAR
|
// all special characters, including ' ' and digits are subtracted from the printable character range
|
||||||
: [!-"$-/:-\u007A\u007C\u007E]
|
// '!' '#' '-' '/' '*'
|
||||||
|
fragment STARTCHAR
|
||||||
|
: ["$-)+-,.:-z|~\u00A0-\u33FF]
|
||||||
;
|
;
|
||||||
|
|
||||||
CHAR
|
// A text should not end with a space, so the ENDHAR omits the space
|
||||||
: [ -\u007E\u00A0-\u33FF] // ASCII and UNICODE
|
fragment ENDCHAR
|
||||||
|
: ["-~\u00A0-\u33FF]
|
||||||
;
|
;
|
||||||
|
|
||||||
|
fragment CHAR
|
||||||
|
: [ -~\u00A0-\u33FF] // ASCII and UNICODE
|
||||||
|
;
|
||||||
|
|
||||||
|
// A text cannot start with a special character or has to be placed in quotes
|
||||||
TEXT
|
TEXT
|
||||||
: STARTCHAR CHAR*
|
: STARTCHAR (CHAR* ENDCHAR)?
|
||||||
;
|
;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue