package com.csray; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; public class LexicalAnalysis { static String id; public static void main(String[] args) throws IOException{ LexicalAnalysis la = new LexicalAnalysis(); String context = //la.BufferedReaderDemo //("C:"+System.getProperty("file.separator")+"Users"+System.getProperty("file.separator")+"Administrator"+System.getProperty("file.separator")+"Desktop"+System.getProperty("file.separator")+"Lexical"); la.BufferedReaderDemo("C:\Users\Administrator\Desktop\Lexical\lexical.c"); System.out.println(context); for(int i = 0; i < context.length();){ char nowc = context.charAt(i); //System.out.println(nowc); if(nowc == ' ' || nowc == ' ' || nowc == ' '){ id = "illegalCharacter"; ++i; }else if(isAlpha(nowc)){ i = alphaProcess(context, nowc, i); } else if(isDigit(nowc)){ i = digitProcess(context, nowc, i); //i++; } else { i = otherProcess(context, nowc, i); //i++; } } } //read the lexical.c file to String public String BufferedReaderDemo(String path) throws IOException{ File file = new File(path); if(!file.exists() || file.isDirectory()) throw new FileNotFoundException(); BufferedReader br = new BufferedReader(new FileReader(file)); String tmp = null; StringBuffer sbuff = new StringBuffer(); tmp = br.readLine(); while(tmp != null){ sbuff.append(tmp+" "); tmp = br.readLine(); } return sbuff.toString(); } //alphaProcess public static int alphaProcess(String context, char c, int i){ StringBuffer word = new StringBuffer(); while((isAlpha(c)) || (isDigit(c)) || c == '_'){ word.append(c); c = context.charAt(++i); } //System.out.println(word.toString()); //check this word if or not a keyword if(checkKeyword(word.toString())){ id = "isKeyword"; System.out.println("( "+ word.toString() + ", " + id +" )"); } else { id = "isCommonWord"; System.out.println("( "+ word.toString() + ", " + id +" )"); } return i; } // checkKeyword public static boolean checkKeyword(String word){ String keyword = "auto double int struct break else long switch case enum register typedef char " + "extern return union const float short unsigned continue for signed void default goto " + "sizeof volatile do if while static scnaf printf"; String[] keyWords = keyword.split(" "); for(int i = 0; i < keyWords.length; ++i){ if(word.equals(keyWords[i])){ return true; } } return false; } // digitProcess public static int digitProcess(String context, char c, int i){ StringBuffer digit = new StringBuffer(); while(isDigit(c)){ digit.append(c); c = context.charAt(++i); } id = "isDigit"; System.out.println("( "+ digit.toString() + ", " + id + " )"); return i; } // otherProcess public static int otherProcess(String context, char c, int i){ //+ / ( ) { } += /= & " " StringBuffer operator = new StringBuffer(); StringBuffer delimiter = new StringBuffer(); if(c == '+'){ operator.append(c); id = "isOperator"; c = context.charAt(++i); if(c == '='){ operator.append(c); System.out.println("( "+ operator + ", " + id + " )"); return ++i; }else if(c == '+'){ operator.append(c); System.out.println("( "+ operator + ", " + id + " )"); return ++i; }else { System.out.println("( "+ operator + ", " + id + " )"); return --i; } } else if(c == '('){ delimiter.append(c); id = "isDelimiter"; System.out.println("(" + delimiter + ", " + id + " )"); return ++i; } else if(c == ')'){ delimiter.append(c); id = "isDelimiter"; System.out.println("(" + delimiter + ", " + id + " )"); return ++i; }else if(c == '{'){ delimiter.append(c); id = "isDelimiter"; System.out.println("(" + delimiter + ", " + id + " )"); return ++i; } else if(c == '}'){ delimiter.append(c); id = "isDelimiter"; System.out.println("(" + delimiter + ", " + id + " )"); return ++i; }else if(c == ';'){ delimiter.append(c); id = "isDelimiter"; System.out.println("(" + delimiter + ", " + id + " )"); return ++i; }else if(c == '='){ operator.append(c); id = "isOperator"; c = context.charAt(++i); if(c == '='){ operator.append(c); System.out.println("( "+ operator + ", " + id + " )"); return ++i; }else { System.out.println("( "+ operator + ", " + id + " )"); return i; } } else if(c == '/'){ operator.append(c); c = context.charAt(++i); if(c == '='){ operator.append(c); id = "isOperator"; System.out.println("( "+ operator + ", " + id + " )"); return ++i; }else if(c == '/'){ operator.append(c); id = "isAnnotation"; c = context.charAt(++i); while(c != ' '){ operator.append(c); c = context.charAt(++i); } System.out.println("( "+ operator + ", " + id + " )"); return i; } }else if(c == '&'){ operator.append(c); id = "isOperator"; System.out.println("(" + operator + ", " + id + " )"); return ++i; }else if(c == '"'){ delimiter.append(c); id = "isDelimiter"; System.out.println("(" + delimiter + ", " + id + " )"); return ++i; }else if(c == ','){ delimiter.append(c); id = "isDelimiter"; System.out.println("(" + delimiter + ", " + id + " )"); return ++i; }else if(c == '%'){ operator.append(c); id = "isOperator"; c = context.charAt(++i); if(c == 'd'){ operator.append(c); System.out.println("(" + operator + ", " + id + " )"); return ++i; } return --i; } return i; } //isDigit public static boolean isDigit(char c){ if(c >= '0' && c <= '9') return true; return false; } //isAlpha public static boolean isAlpha(char c){ if(c >= 'A' && c <= 'Z') return true; else if(c >= 'a' && c <= 'z') return true; return false; } }
实例:
//test int main() { float nu0_m; int num = 100; num++; scanf("%d", &num); printf("%d", num); return 0; }
结果:
//test int main() { float nu0_m; int num = 100; num++; scanf("%d", &num); printf("%d", num); return 0; } ( //test, isAnnotation ) ( int, isKeyword ) ( main, isCommonWord ) ((, isDelimiter ) (), isDelimiter ) ({, isDelimiter ) ( float, isKeyword ) ( nu0_m, isCommonWord ) (;, isDelimiter ) ( int, isKeyword ) ( num, isCommonWord ) ( =, isOperator ) ( 100, isDigit ) (;, isDelimiter ) ( num, isCommonWord ) ( ++, isOperator ) (;, isDelimiter ) ( scanf, isCommonWord ) ((, isDelimiter ) (", isDelimiter ) (%d, isOperator ) (", isDelimiter ) (,, isDelimiter ) (&, isOperator ) ( num, isCommonWord ) (), isDelimiter ) (;, isDelimiter ) ( printf, isKeyword ) ((, isDelimiter ) (", isDelimiter ) (%d, isOperator ) (", isDelimiter ) (,, isDelimiter ) ( num, isCommonWord ) (), isDelimiter ) (;, isDelimiter ) ( return, isKeyword ) ( 0, isDigit ) (;, isDelimiter ) (}, isDelimiter )