*
* CLexer shredder = new CLexer(System.in);
* CToken t;
* while ((t = shredder.getNextToken()) != null){
* System.out.println(t);
* }
*
*
*
* @see CToken
*/
%%
%public
%class CLexer
%implements Lexer
%function getNextToken
%type Token
%{
private int lastToken;
private int nextState=YYINITIAL;
/**
* next Token method that allows you to control if whitespace and comments are
* returned as tokens.
*/
public Token getNextToken(boolean returnComments, boolean returnWhiteSpace)throws IOException{
Token t = getNextToken();
while (t != null && ((!returnWhiteSpace && t.isWhiteSpace()) || (!returnComments && t.isComment()))){
t = getNextToken();
}
return (t);
}
/**
* Prints out tokens from a file or System.in.
* If no arguments are given, System.in will be used for input.
* If more arguments are given, the first argument will be used as
* the name of the file to use as input
*
* @param args program arguments, of which the first is a filename
*/
public static void main(String[] args) {
InputStream in;
try {
if (args.length > 0){
File f = new File(args[0]);
if (f.exists()){
if (f.canRead()){
in = new FileInputStream(f);
} else {
throw new IOException("Could not open " + args[0]);
}
} else {
throw new IOException("Could not find " + args[0]);
}
} else {
in = System.in;
}
CLexer shredder = new CLexer(in);
Token t;
while ((t = shredder.getNextToken()) != null) {
if (t.getID() != CToken.WHITE_SPACE){
System.out.println(t);
}
}
} catch (IOException e){
System.out.println(e.getMessage());
}
}
/**
* Closes the current input stream, and resets the scanner to read from a new input stream.
* All internal variables are reset, the old input stream cannot be reused
* (content of the internal buffer is discarded and lost).
* The lexical state is set to the initial state.
* Subsequent tokens read from the lexer will start with the line, char, and column
* values given here.
*
* @param reader The new input.
* @param yyline The line number of the first token.
* @param yychar The position (relative to the start of the stream) of the first token.
* @param yycolumn The position (relative to the line) of the first token.
* @throws IOException if an IOExecption occurs while switching readers.
*/
public void reset(java.io.Reader reader, int yyline, int yychar, int yycolumn) throws IOException{
yyreset(reader);
this.yyline = yyline;
this.yychar = yychar;
this.yycolumn = yycolumn;
}
%}
%line
%char
%full
%state MIDDLE_OF_LINE
%state PREPROCESSOR
HASH=("#"|"??=")
LBRACKET=("["|"??(")
RBRACKET=("]"|"??)")
BACKSLASH=([\\]|"??/")
CARET=("^"|"??'")
LBRACE=("{"|"??<")
RBRACE=("}"|"??>")
VERTICAL=("|"|"??!")
TILDE=("~"|"??-")
BooleanLiteral=("true"|"false")
HexDigit=([0-9a-fA-F])
Digit=([0-9])
OctalDigit=([0-7])
TetraDigit=([0-3])
NonZeroDigit=([1-9])
Letter=([a-zA-Z_])
BLANK=([ ])
TAB=([\t])
FF=([\f])
EscChar=({BACKSLASH})
CR=([\r])
LF=([\n])
EOL=({CR}|{LF}|{CR}{LF})
WhiteSpace=({BLANK}|{TAB}|{FF}|{EOL})
NonBreakingWhiteSpace=({BLANK}|{TAB}|{FF})
AnyNonSeparator=([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\']|{HASH}|{BACKSLASH})
OctEscape1=({EscChar}{OctalDigit})
OctEscape2=({EscChar}{OctalDigit}{OctalDigit})
OctEscape3=({EscChar}{TetraDigit}{OctalDigit}{OctalDigit})
OctEscape=({OctEscape1}|{OctEscape2}|{OctEscape3})
HexEscape=({EscChar}[x|X]{HexDigit}{HexDigit})
Escape=({EscChar}([a]|[b]|[f]|[n]|[r]|[t]|[v]|[\']|[\"]|[\?]|{BACKSLASH}|[0]))
Identifier=({Letter}({Letter}|{Digit}|"$")*)
ErrorIdentifier=({AnyNonSeparator}+)
Comment=("//"[^\r\n]*)
TradCommentBegin=("/*")
DocCommentBegin =("/**")
NonTermStars=([^\*\/]*[\*]+[^\*\/])
TermStars=([\*]+[\/])
CommentText=((([^\*]*[\/])|{NonTermStars})*)
CommentEnd=([^\*]*{TermStars})
TradComment=({TradCommentBegin}{CommentText}{CommentEnd})
DocCommentEnd1=([^\/\*]{CommentText}{CommentEnd})
DocCommentEnd2=({NonTermStars}{CommentText}{CommentEnd})
DocComment=({DocCommentBegin}({DocCommentEnd1}|{DocCommentEnd2}|{TermStars}|[\/]))
OpenComment=({TradCommentBegin}{CommentText}([^\*]*)([\*]*))
LongSuffix=(([lL][uU]?)|([uU][lL]?))
DecimalNum=(([0]|{NonZeroDigit}{Digit}*){LongSuffix}?)
OctalNum=([0]{OctalDigit}*{LongSuffix}?)
HexNum=([0]([x]|[X]){HexDigit}{HexDigit}*{LongSuffix}?)
Sign=([\+\-])
SignedInt=({Sign}?{Digit}+)
Expo=([eE])
ExponentPart=({Expo}{SignedInt})
FloatSuffix=([fFlL])
FloatWDecimal=(({Digit}*[\.]{Digit}+)|({Digit}+[\.]{Digit}*))
Float1=({FloatWDecimal}{ExponentPart}?)
Float2=({Digit}+{ExponentPart})
Float=(({Float1}|{Float2}){FloatSuffix}?)
ErrorFloat=({Digit}({AnyNonSeparator}|[\.])*)
AnyChrChr=([^\'\n\r\\])
TrigraphChar = ({HASH}|{LBRACKET}|{RBRACKET}|{CARET}|{LBRACE}|{RBRACE}|{VERTICAL}|{TILDE})
UnclosedCharacter=([\']({Escape}|{OctEscape}|{HexEscape}|{TrigraphChar}|{AnyChrChr}))
Character=({UnclosedCharacter}[\'])
MalformedUnclosedCharacter=([\']({AnyChrChr}|({EscChar}[^\n\r]))*)
MalformedCharacter=([\'][\']|{MalformedUnclosedCharacter}[\'])
AnyStrChr=([^\"\n\r\\\?])
SlashNewLine=({BACKSLASH}{EOL})
FalseTrigraph= (("?"(("?")*)[^\=\(\)\/\'\<\>\!\-\\\?\"\n\r])|("?"[\=\(\)\/\'\<\>\!\-]))
UnclosedString=([\"]((((("?")*)({Escape}|{OctEscape}|{HexEscape}|{TrigraphChar}))|{FalseTrigraph}|{AnyStrChr}|{SlashNewLine})*)(("?")*))
String=({UnclosedString}[\"])
MalformedUnclosedString=([\"]([^\"\n\r])*)
MalformedString=({MalformedUnclosedString}[\"])
PreProcessorKeyWord=("include"|"include_next"|"define"|"undef"|"if"|"ifdef"|"ifndef"|"else"|"elif"|"endif"|"line"|"pragma"|"error")
PreProcessorEscapes=({EscChar}{EOL}|{EscChar})
PreProcessorText=(([^\n\r\/]|{PreProcessorEscapes}|[\/][^\/\*\n\r]|[\/]{PreProcessorEscapes})*)
PreProcessorDirective=({HASH}({NonBreakingWhiteSpace}*){PreProcessorKeyWord}{PreProcessorText})
MalformedPreProcessorDirective=({HASH}({NonBreakingWhiteSpace}*)([^\/\n\r\ \t\f\\]*))
%%