mirror of
https://codeberg.org/NLnet/takentaal.git
synced 2025-08-30 14:30:08 +00:00
Split description text into character tokens so that text has a lower ranking
This commit is contained in:
parent
c0b60b1c4f
commit
f9e92b8152
1 changed files with 21 additions and 27 deletions
48
takentaal.g4
48
takentaal.g4
|
@ -8,8 +8,8 @@
|
|||
* - any implicit rule, e.g. 'a', is chosen
|
||||
* - the first defined rule is chosen.
|
||||
* Since this grammar has to match unquoted texts and text are usually longer
|
||||
* than other token matches, the TEXT rule disallows many characters as the
|
||||
* first character to start with.
|
||||
* than other token matches, texts are split into characters so that they have
|
||||
* a lower ranking.
|
||||
*/
|
||||
|
||||
grammar takentaal;
|
||||
|
@ -20,27 +20,31 @@ takentaal
|
|||
;
|
||||
|
||||
header
|
||||
: 'takentaal v0.1.0' EOL
|
||||
: 'takentaal v0.1.0'
|
||||
;
|
||||
|
||||
text
|
||||
: (S | CHAR | WORD)+
|
||||
;
|
||||
|
||||
plan
|
||||
: PLAN_TOKEN S* amount TEXT EOL
|
||||
: PLAN_TOKEN S* amount text
|
||||
description
|
||||
task+
|
||||
;
|
||||
|
||||
description
|
||||
: (TEXT EOL)*
|
||||
: (EOL text)*
|
||||
;
|
||||
|
||||
task
|
||||
: TASK_TOKEN S* amount TEXT EOL
|
||||
: TASK_TOKEN S* amount text
|
||||
description
|
||||
subtask*
|
||||
;
|
||||
|
||||
subtask
|
||||
: SUBTASK_TOKEN S* amount TEXT EOL
|
||||
: SUBTASK_TOKEN S* amount text
|
||||
description
|
||||
;
|
||||
|
||||
|
@ -50,15 +54,15 @@ amount
|
|||
;
|
||||
|
||||
PLAN_TOKEN
|
||||
: '#'
|
||||
: EOL+ '#'
|
||||
;
|
||||
|
||||
TASK_TOKEN
|
||||
: '##'
|
||||
: EOL+ '##'
|
||||
;
|
||||
|
||||
SUBTASK_TOKEN
|
||||
: (SUBTASK_NEW_TOKEN | SUBTASK_PARTIAL_TOKEN | SUBTASK_COMPLETE_TOKEN | SUBTASK_OBSOLETE_TOKEN)
|
||||
: EOL+ (SUBTASK_NEW_TOKEN | SUBTASK_PARTIAL_TOKEN | SUBTASK_COMPLETE_TOKEN | SUBTASK_OBSOLETE_TOKEN)
|
||||
;
|
||||
|
||||
SUBTASK_NEW_TOKEN
|
||||
|
@ -78,7 +82,7 @@ SUBTASK_OBSOLETE_TOKEN
|
|||
;
|
||||
|
||||
S
|
||||
: ' ' -> skip
|
||||
: ' '
|
||||
;
|
||||
|
||||
|
||||
|
@ -106,22 +110,12 @@ END_AMOUNT
|
|||
: '}'
|
||||
;
|
||||
|
||||
// all special characters, including ' ' and digits are subtracted from the printable character range
|
||||
// '!' '#' '-' '/' '*'
|
||||
fragment STARTCHAR
|
||||
: ["$-)+-,.:-z|~\u00A0-\u33FF]
|
||||
// Match printable characters, except space which is covered by S
|
||||
CHAR
|
||||
: [!-~\u00A0-\u33FF] // ASCII and UNICODE
|
||||
;
|
||||
|
||||
// A text should not end with a space, so the ENDHAR omits the space
|
||||
fragment ENDCHAR
|
||||
: ["-~\u00A0-\u33FF]
|
||||
;
|
||||
|
||||
fragment CHAR
|
||||
: [ -~\u00A0-\u33FF] // ASCII and UNICODE
|
||||
;
|
||||
|
||||
// A text cannot start with a special character or has to be placed in quotes
|
||||
TEXT
|
||||
: STARTCHAR (CHAR* ENDCHAR)?
|
||||
// This is a performance improvement that groups chars that do not have a special meaning
|
||||
WORD
|
||||
: [A-Za-z\u00A0-\u33FF]+
|
||||
;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue