1: package compiler;
2:
3: import java.io.BufferedReader;
4: import java.io.FileNotFoundException;
5: import java.io.FileReader;
6: import java.util.Arrays;
7:
8: public class Scanner {
9:
10: public int lineCnt=0;
11: private char curCh = ' ';
12: private String line;
13: public int lineLength = 0;
14: public int chCount = 0;
15: private int[] ssym;
16: private BufferedReader in;
17:
18: public Scanner(String filePath) {
19: try {
20: in = new BufferedReader(new FileReader(filePath));
21: } catch (FileNotFoundException ex) {
22: ex.printStackTrace();
23: System.out.println("***File not exist!***");
24: }
25: //设置单字符
26: ssym = new int[256];
27: Arrays.fill(ssym, Symbol.nul);
28: ssym['+'] = Symbol.plus;
29: ssym['-'] = Symbol.minus;
30: ssym['*'] = Symbol.mul;
31: ssym['/'] = Symbol.div;
32: ssym['('] = Symbol.lparen;
33: ssym[')'] = Symbol.rparen;
34: ssym['='] = Symbol.eql;
35: ssym[','] = Symbol.comma;
36: ssym['.'] = Symbol.peroid;
37: ssym[';'] = Symbol.semicolon;
38:
39: }
40:
41: //读取一个字符,为减少磁盘I/O次数,每次读取一行
42: void getch() {
43: if (chCount == lineLength) {
44: try {//如果读到行末尾,就重新读入一行
45: String tmp="";
46: while (tmp.equals("")) {
47: tmp=in.readLine().trim()+' '; //除去空行
48: }
49: line=tmp;
50: lineCnt++;
51: } catch (Exception e) {
52: // throw new Error("***program imcomplete!***");
53: e.printStackTrace();
54: System.out.println("***reading character meet with error!***");
55: }
56: lineLength = line.length();
57: chCount = 0;
58: System.out.println(line);
59: }
60: curCh = line.charAt(chCount++);
61: }
62:
63: //词法分析,获取一个词法分析符号,是词法分析器的重点
64: public Symbol getsym() {
65: Symbol sym;
66: while (curCh == ' ') {
67: getch();
68: }
69: if ((curCh >= 'a' && curCh <= 'z')||(curCh >= 'A' && curCh <= 'Z')) {
70: sym = matchKeywordOrIdentifier(); //关键字或者一般标识符
71: } else if (curCh >= '0' && curCh <= '9') {
72: sym = matchNumber(); //数字
73: } else {
74: sym = matchOperator(); //操作符
75: }
76: return sym;
77: }
78:
79: private Symbol matchKeywordOrIdentifier() {
80: StringBuffer sb = new StringBuffer();
81: do{
82: sb.append(curCh);
83: getch();
84: }while((curCh >= 'a' && curCh <= 'z')||(curCh>='A'&&curCh<='Z') || (curCh >= '0' && curCh <= '9'));
85:
86: String token = sb.toString();
87: int index = Arrays.binarySearch(Symbol.word, token); //搜索是不是保留字
88: Symbol sym = null;
89: if (index < 0) {
90: sym = new Symbol(Symbol.ident); //一般标识符
91: sym.id = token;
92: } else {
93: sym = new Symbol(Symbol.wsym[index]); //保留字对应的符号值0-31
94: }
95: return sym;
96: }
97:
98: private Symbol matchNumber() {
99: //统计数字位数
100: Symbol sym = new Symbol(Symbol.number);
101: do {
102: sym.num = 10 * sym.num + curCh - '0'; // 获取数字的值
103: getch();
104: } while (curCh >= '0' && curCh <= '9'); //!!!
105:
106: return sym;
107: }
108:
109: private Symbol matchOperator() {
110: Symbol sym = null;
111: switch (curCh) {
112: case ':': // 赋值符号
113: getch();
114: if (curCh == '=') {
115: sym = new Symbol(Symbol.becomes);
116: getch();
117: } else {
118: sym = new Symbol(Symbol.nul); //不能识别的符号
119: }
120: break;
121: case '<': //小于或者小于等于
122: getch();
123: if (curCh == '=') {
124: sym = new Symbol(Symbol.leq); //是<=
125: getch();
126: } else if (curCh == '>') {
127: sym = new Symbol(Symbol.neq); //是<>
128: getch();
129: } else {
130: sym = new Symbol(Symbol.lss); //是<
131: }
132: break;
133: case '>': //大于或者大于等于
134: getch();
135: if (curCh == '=') {
136: sym = new Symbol(Symbol.geq); //大于等于
137: getch();
138: } else {
139: sym = new Symbol(Symbol.gtr); //大于
140: }
141: break;
142: default:
143: sym = new Symbol(ssym[curCh]);
144: if (sym.symtype != Symbol.peroid) {
145: getch();
146: }
147: }
148: return sym;
149: }
150: }