mirror of
https://codeberg.org/NLnet/takentaal.git
synced 2025-08-30 14:30:08 +00:00
Split description text into character tokens so that text has a lower ranking
This commit is contained in:
parent
c0b60b1c4f
commit
f9e92b8152
1 changed files with 21 additions and 27 deletions
48
takentaal.g4
48
takentaal.g4
|
@ -8,8 +8,8 @@
|
||||||
* - any implicit rule, e.g. 'a', is chosen
|
* - any implicit rule, e.g. 'a', is chosen
|
||||||
* - the first defined rule is chosen.
|
* - the first defined rule is chosen.
|
||||||
* Since this grammar has to match unquoted texts and text are usually longer
|
* Since this grammar has to match unquoted texts and text are usually longer
|
||||||
* than other token matches, the TEXT rule disallows many characters as the
|
* than other token matches, texts are split into characters so that they have
|
||||||
* first character to start with.
|
* a lower ranking.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
grammar takentaal;
|
grammar takentaal;
|
||||||
|
@ -20,27 +20,31 @@ takentaal
|
||||||
;
|
;
|
||||||
|
|
||||||
header
|
header
|
||||||
: 'takentaal v0.1.0' EOL
|
: 'takentaal v0.1.0'
|
||||||
|
;
|
||||||
|
|
||||||
|
text
|
||||||
|
: (S | CHAR | WORD)+
|
||||||
;
|
;
|
||||||
|
|
||||||
plan
|
plan
|
||||||
: PLAN_TOKEN S* amount TEXT EOL
|
: PLAN_TOKEN S* amount text
|
||||||
description
|
description
|
||||||
task+
|
task+
|
||||||
;
|
;
|
||||||
|
|
||||||
description
|
description
|
||||||
: (TEXT EOL)*
|
: (EOL text)*
|
||||||
;
|
;
|
||||||
|
|
||||||
task
|
task
|
||||||
: TASK_TOKEN S* amount TEXT EOL
|
: TASK_TOKEN S* amount text
|
||||||
description
|
description
|
||||||
subtask*
|
subtask*
|
||||||
;
|
;
|
||||||
|
|
||||||
subtask
|
subtask
|
||||||
: SUBTASK_TOKEN S* amount TEXT EOL
|
: SUBTASK_TOKEN S* amount text
|
||||||
description
|
description
|
||||||
;
|
;
|
||||||
|
|
||||||
|
@ -50,15 +54,15 @@ amount
|
||||||
;
|
;
|
||||||
|
|
||||||
PLAN_TOKEN
|
PLAN_TOKEN
|
||||||
: '#'
|
: EOL+ '#'
|
||||||
;
|
;
|
||||||
|
|
||||||
TASK_TOKEN
|
TASK_TOKEN
|
||||||
: '##'
|
: EOL+ '##'
|
||||||
;
|
;
|
||||||
|
|
||||||
SUBTASK_TOKEN
|
SUBTASK_TOKEN
|
||||||
: (SUBTASK_NEW_TOKEN | SUBTASK_PARTIAL_TOKEN | SUBTASK_COMPLETE_TOKEN | SUBTASK_OBSOLETE_TOKEN)
|
: EOL+ (SUBTASK_NEW_TOKEN | SUBTASK_PARTIAL_TOKEN | SUBTASK_COMPLETE_TOKEN | SUBTASK_OBSOLETE_TOKEN)
|
||||||
;
|
;
|
||||||
|
|
||||||
SUBTASK_NEW_TOKEN
|
SUBTASK_NEW_TOKEN
|
||||||
|
@ -78,7 +82,7 @@ SUBTASK_OBSOLETE_TOKEN
|
||||||
;
|
;
|
||||||
|
|
||||||
S
|
S
|
||||||
: ' ' -> skip
|
: ' '
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
||||||
|
@ -106,22 +110,12 @@ END_AMOUNT
|
||||||
: '}'
|
: '}'
|
||||||
;
|
;
|
||||||
|
|
||||||
// all special characters, including ' ' and digits are subtracted from the printable character range
|
// Match printable characters, except space which is covered by S
|
||||||
// '!' '#' '-' '/' '*'
|
CHAR
|
||||||
fragment STARTCHAR
|
: [!-~\u00A0-\u33FF] // ASCII and UNICODE
|
||||||
: ["$-)+-,.:-z|~\u00A0-\u33FF]
|
|
||||||
;
|
;
|
||||||
|
|
||||||
// A text should not end with a space, so the ENDHAR omits the space
|
// This is a performance improvement that groups chars that do not have a special meaning
|
||||||
fragment ENDCHAR
|
WORD
|
||||||
: ["-~\u00A0-\u33FF]
|
: [A-Za-z\u00A0-\u33FF]+
|
||||||
;
|
|
||||||
|
|
||||||
fragment CHAR
|
|
||||||
: [ -~\u00A0-\u33FF] // ASCII and UNICODE
|
|
||||||
;
|
|
||||||
|
|
||||||
// A text cannot start with a special character or has to be placed in quotes
|
|
||||||
TEXT
|
|
||||||
: STARTCHAR (CHAR* ENDCHAR)?
|
|
||||||
;
|
;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue