mirror of
				https://codeberg.org/NLnet/takentaal.git
				synced 2025-10-26 17:39:22 +00:00 
			
		
		
		
	Add more characters that should be ommitted from STARTCHAR
and add an explanation for why this is the case.
This commit is contained in:
		
							parent
							
								
									ecc5a1aa00
								
							
						
					
					
						commit
						c0b60b1c4f
					
				
					 2 changed files with 32 additions and 10 deletions
				
			
		
							
								
								
									
										6
									
								
								example
									
										
									
									
									
								
							
							
						
						
									
										6
									
								
								example
									
										
									
									
									
								
							|  | @ -1,7 +1,7 @@ | ||||||
| takentaal v0.1.0 | takentaal v0.1.0 | ||||||
| #  {5000} Full work plan | #  {5000} Full work plan | ||||||
| 
 | 
 | ||||||
| This is the description of the entire work plan. | This is the (draft) description of the entire work plan. | ||||||
| 
 | 
 | ||||||
| ## {1000} First task | ## {1000} First task | ||||||
| 
 | 
 | ||||||
|  | @ -12,9 +12,9 @@ This description has # ' " [] symbols and ü © Ð Latin-1 chars. | ||||||
| -   {500} First subtask | -   {500} First subtask | ||||||
| /   {500} Second subtask | /   {500} Second subtask | ||||||
| 
 | 
 | ||||||
| ## Second task {1000} | ## {1000} Second task | ||||||
| 
 | 
 | ||||||
| This is the description of the second task. | This is the description of the second task. | ||||||
| 
 | 
 | ||||||
| *   {500} First subtask | *   {500} First subtask | ||||||
| - {500} Second subtask | - {500} \2nd subtask | ||||||
|  |  | ||||||
							
								
								
									
										36
									
								
								takentaal.g4
									
										
									
									
									
								
							
							
						
						
									
										36
									
								
								takentaal.g4
									
										
									
									
									
								
							|  | @ -1,3 +1,17 @@ | ||||||
|  | /** | ||||||
|  |  * This file defines the grammar for takentaal. | ||||||
|  |  * It is divided into parser rules (lowercase) and lexer rules (uppercase). | ||||||
|  |  * The parser splits an input into tokens accoring to the lexer rules. | ||||||
|  |  * At any point, all lexer rules are considered. If multiple rules match, | ||||||
|  |  * a lexer rule is chosen as follows: | ||||||
|  |  *  - the rule that matches the longest input is chosen | ||||||
|  |  *  - any implicit rule, e.g. 'a', is chosen | ||||||
|  |  *  - the first defined rule is chosen. | ||||||
|  |  * Since this grammar has to match unquoted texts and text are usually longer | ||||||
|  |  * than other token matches, the TEXT rule disallows many characters as the | ||||||
|  |  * first character to start with. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
| grammar takentaal; | grammar takentaal; | ||||||
| 
 | 
 | ||||||
| takentaal | takentaal | ||||||
|  | @ -73,14 +87,14 @@ WS | ||||||
|     ; |     ; | ||||||
| 
 | 
 | ||||||
| EOL | EOL | ||||||
|     : '\n'+ |     : ' '* '\n'+ | ||||||
|     ; |     ; | ||||||
| 
 | 
 | ||||||
| INT | INT | ||||||
|     : DIGIT+ |     : DIGIT+ | ||||||
|     ; |     ; | ||||||
| 
 | 
 | ||||||
| DIGIT | fragment DIGIT | ||||||
|     : [0-9] |     : [0-9] | ||||||
|     ; |     ; | ||||||
| 
 | 
 | ||||||
|  | @ -92,14 +106,22 @@ END_AMOUNT | ||||||
|     : '}' |     : '}' | ||||||
|     ; |     ; | ||||||
| 
 | 
 | ||||||
| STARTCHAR | // all special characters, including ' ' and digits are subtracted from the printable character range | ||||||
|     : [!-"$-/:-\u007A\u007C\u007E] | // '!' '#' '-' '/' '*' | ||||||
|  | fragment STARTCHAR | ||||||
|  |     : ["$-)+-,.:-z|~\u00A0-\u33FF] | ||||||
|     ; |     ; | ||||||
| 
 | 
 | ||||||
| CHAR | // A text should not end with a space, so the ENDHAR omits the space | ||||||
|     : [ -\u007E\u00A0-\u33FF] // ASCII and UNICODE | fragment ENDCHAR | ||||||
|  |     : ["-~\u00A0-\u33FF] | ||||||
|     ; |     ; | ||||||
| 
 | 
 | ||||||
|  | fragment CHAR | ||||||
|  |     : [ -~\u00A0-\u33FF] // ASCII and UNICODE | ||||||
|  |     ; | ||||||
|  | 
 | ||||||
|  | // A text cannot start with a special character or has to be placed in quotes | ||||||
| TEXT | TEXT | ||||||
|     : STARTCHAR CHAR* |     : STARTCHAR (CHAR* ENDCHAR)? | ||||||
|     ; |     ; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue