/*
============================================================================
Name : CTokens.g
Author : luqi
Version : 0.1
Copyright : Your copyright notice
Description : C99 - Lexer - have tested <C99.pdf 6.4>
============================================================================
*/
grammar CTokens;
options {
language = Java;
superClass = DebugParser;
//@ superClass = DebugLexer;
}
@header
{
package c99.ctokens;
import util.DebugParser;
}
@lexer::header
{
package c99.ctokens;
import util.DebugLexer;
}
prog : token
;
token : KEYWORD { System.out.println("Meet KEYWORD: " + $KEYWORD.text); }
| IDENTIFIER { System.out.println("Meet IDENTIFIER: " + $IDENTIFIER.text); }
| CONSTANT { System.out.println("Meet CONSTANT: " + $CONSTANT.text); }
| STRING_LITERAL { System.out.println("Meet STRING_LITERAL: " + $STRING_LITERAL.text); }
//| PUNCTUATOR
;
keyword : KEYWORD
;
identifier : IDENTIFIER
;
constant : CONSTANT
;
string_literal : STRING_LITERAL
;
/*
==========================================================================================================
*/
KEYWORD : 'auto' | 'break' | 'case' | 'char' | 'const' | 'continue'
| 'default' | 'do' | 'double' | 'else' | 'enum' | 'extern'
| 'float' | 'for' | 'goto' | 'if' | 'inline' | 'int'
| 'long' | 'register' | 'restrict' | 'return' | 'short' | 'signed'
| 'sizeof' | 'static' | 'struct' | 'switch' | 'typedef' | 'union'
| 'unsigned' | 'void' | 'volatile' | 'while' | '_Bool' | '_Complex'
| '_Imaginary'
;
IDENTIFIER : IDENTIFIER_NONDIGIT ( IDENTIFIER_NONDIGIT | DIGIT ) *
;
fragment
IDENTIFIER_NONDIGIT : NONDIGIT
| UNIVERSAL_CHARACTER_NAME
;
fragment
NONDIGIT : 'a' .. 'z'
| 'A' .. 'Z'
| '_'
;
fragment
DIGIT : '0' .. '9'
;
fragment
UNIVERSAL_CHARACTER_NAME : '\\u' HEX_QUAD
| '\\U' HEX_QUAD HEX_QUAD
;
HEX_QUAD : HEXADECIMAL_DIGIT HEXADECIMAL_DIGIT HEXADECIMAL_DIGIT HEXADECIMAL_DIGIT
;
CONSTANT : INTEGER_CONSTANT
| FLOATING_CONSTANT
//| ENUMERATION_CONSTANT
| CHARACTER_CONSTANT
;
fragment
INTEGER_CONSTANT : DECIMAL_CONSTANT INTEGER_SUFFIX ?
| OCTAL_CONSTANT INTEGER_SUFFIX ?
| HEXADECIMAL_CONSTANT INTEGER_SUFFIX ?
;
fragment
INTEGER_SUFFIX : UNSIGNED_SUFFIX LONG_SUFFIX ?
| UNSIGNED_SUFFIX LONG_LONG_SUFFIX
| LONG_SUFFIX UNSIGNED_SUFFIX ?
| LONG_LONG_SUFFIX UNSIGNED_SUFFIX ?
;
fragment
UNSIGNED_SUFFIX : 'u'
| 'U'
;
fragment
LONG_SUFFIX : 'l'
| 'L'
;
fragment
LONG_LONG_SUFFIX : 'll'
| 'LL'
;
fragment
DECIMAL_CONSTANT : NONZERO_DIGIT (DIGIT) *
;
fragment
OCTAL_CONSTANT : '0' (OCTAL_DIGIT) *
;
fragment
HEXADECIMAL_CONSTANT : HEXADECIMAL_PREFIX ( HEXADECIMAL_DIGIT )+
;
HEXADECIMAL_PREFIX : '0x'
| '0X'
;
fragment
NONZERO_DIGIT : '1' .. '9'
;
fragment
OCTAL_DIGIT : '0' .. '7'
;
fragment
HEXADECIMAL_DIGIT : '0' .. '9'
| 'a' .. 'f'
| 'A' .. 'F'
;
fragment
FLOATING_CONSTANT : DECIMAL_FLOATING_CONSTANT
| HEXADECIMAL_FLOATING_CONSTANT
;
fragment
DECIMAL_FLOATING_CONSTANT : FRACTIONAL_CONSTANT EXPONENT_PART ? FLOATING_SUFFIX ?
| DIGIT_SEQUENCE EXPONENT_PART FLOATING_SUFFIX ?
;
fragment
FRACTIONAL_CONSTANT : DIGIT_SEQUENCE ? '.' DIGIT_SEQUENCE
| DIGIT_SEQUENCE '.'
;
EXPONENT_PART : 'e' SIGN ? DIGIT_SEQUENCE
| 'E' SIGN ? DIGIT_SEQUENCE
;
fragment
SIGN : '+'
| '-'
;
fragment
DIGIT_SEQUENCE : DIGIT +
;
fragment
HEXADECIMAL_FLOATING_CONSTANT : HEXADECIMAL_PREFIX HEXADECIMAL_FRACTIONAL_CONSTANT BINARY_EXPONENT_PART FLOATING_SUFFIX ?
| HEXADECIMAL_PREFIX HEXADECIMAL_DIGIT_SEQUENCE BINARY_EXPONENT_PART FLOATING_SUFFIX ?
;
HEXADECIMAL_FRACTIONAL_CONSTANT : HEXADECIMAL_DIGIT_SEQUENCE ? '.' HEXADECIMAL_DIGIT_SEQUENCE
| HEXADECIMAL_DIGIT_SEQUENCE '.'
;
BINARY_EXPONENT_PART : 'p' SIGN ? DIGIT_SEQUENCE
| 'P' SIGN ? DIGIT_SEQUENCE
;
HEXADECIMAL_DIGIT_SEQUENCE : HEXADECIMAL_DIGIT +
;
fragment
FLOATING_SUFFIX : 'f'
| 'l'
| 'F'
| 'L'
;
//fragment
//ENUMERATION_CONSTANT : IDENTIFIER
// ;
fragment
CHARACTER_CONSTANT : '\'' C_CHAR_SEQUENCE '\''
| 'L\'' C_CHAR_SEQUENCE '\''
;
fragment
C_CHAR_SEQUENCE : C_CHAR +
;
fragment
C_CHAR : ~('\'' | '\\' )
| ESCAPE_SEQUENCE
;
ESCAPE_SEQUENCE : SIMPLE_ESCAPE_SEQUENCE
| OCTAL_ESCAPE_SEQUENCE
| HEXADECIMAL_ESCAPE_SEQUENCE
| UNIVERSAL_CHARACTER_NAME
;
fragment
SIMPLE_ESCAPE_SEQUENCE : '\\' ( '\'' | '"' | '?' | '\\' | 'a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' )
;
fragment
OCTAL_ESCAPE_SEQUENCE : '\\' OCTAL_DIGIT // OCTAL_DIGIT OCTAL_DIGIT ? OCTAL_DIGIT ?
| '\\' OCTAL_DIGIT OCTAL_DIGIT
| ('\\' OCTAL_DIGIT OCTAL_DIGIT OCTAL_DIGIT )=> '\\' OCTAL_DIGIT OCTAL_DIGIT OCTAL_DIGIT
;
fragment
HEXADECIMAL_ESCAPE_SEQUENCE : '\\x' HEXADECIMAL_DIGIT +
;
STRING_LITERAL : '"' S_CHAR_SEQUENCE ? '"'
| 'L"' S_CHAR_SEQUENCE ? '"'
;
fragment
S_CHAR_SEQUENCE : S_CHAR +
;
fragment
S_CHAR : ~('"' | '\\' )
| ESCAPE_SEQUENCE
;
SINGLELINECOMMENT : '//' (~('\n'|'\r'))* ('\n'|'\r'('\n')?)? {$channel=HIDDEN;}
;
MULTILINECOMMENT : '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;}
;
WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;}
;