词法分析程序(Lexical Analyzer)要求:
- 从左至右扫描构成源程序的字符流
- 识别出有词法意义的单词(Lexemes)
- 返回单词记录(单词类别,单词本身)
- 滤掉空格
- 跳过注释
- 发现词法错误
程序结构:
输入:字符流(什么输入方式,什么数据结构保存)
处理:
–遍历(什么遍历方式)
–词法规则
输出:单词流(什么输出形式)
–二元组
单词类别:
1.标识符(10)
2.无符号数(11)
3.保留字(一词一码)
4.运算符(一词一码)
5.界符(一词一码)
单词符号 |
种别码 |
单词符号 |
种别码 |
begin |
1 |
: |
17 |
if |
2 |
:= |
18 |
then |
3 |
< |
20 |
while |
4 |
<= |
21 |
do |
5 |
<> |
22 |
end |
6 |
> |
23 |
l(l|d)* |
10 |
>= |
24 |
dd* |
11 |
= |
25 |
+ |
13 |
; |
26 |
- |
14 |
( |
27 |
* |
15 |
) |
28 |
/ |
16 |
# |
0 |
#include <iostream> #include <fstream> #include <string> #include <windows.h> using namespace std; #define CODE "E:\code\code.txt" #define RESULT "E:\code\result.txt" //token数组用来接收关键字,变量,运算符和界符 //这里限制变量名的长度最多为9 //prog数组存储的是源代码字符串长度 char *prog, token[10]; char ch; //syn是各个单词符号对应的数字 int syn, p, m = 0, n, line, sum = 0; //rwtab数组存储的是关键字 char *rwtab1[10] = { "begin","if","then","while","do","end" }; char *rwtab2[4] = { "const","var","procedure","call" }; void scaner() { //规定,标识符只能由字母或数字构成 /* 共分为三大块,分别是标示符、数字、符号,对应下面的 if else if 和 else */ //将全部置空 for (n = 0; n<10; n++) //token为已捕获的字符数 token[n] = NULL; ch = prog[p++]; //这样处理,可以去除空格 while (ch == ' ') { ch = prog[p]; p++; } //在这个if判断中,范围是a-z或者A-Z,因为规定变量只能以字母开头 if ((ch >= 'a'&&ch <= 'z') || (ch >= 'A'&&ch <= 'Z')) //可能是标示符或者变量名 { m = 0; //这里,是变量的第一个字符以后,可以是字母,数字 while ((ch >= '0'&&ch <= '9') || (ch >= 'a'&&ch <= 'z') || (ch >= 'A'&&ch <= 'Z')) { token[m++] = ch; ch = prog[p++]; } //变量的字符串结束标志 token[m++] = ' '; p--; syn = 10; //将识别出来的字符和已定义的标示符作比较, 判断是否是关键字,所有关键字都是小写 for (n = 0; n<6; n++) if (strcmp(token, rwtab1[n]) == 0) { syn = n + 1; break; } for (n = 0; n < 4;n++) { if (strcmp(token, rwtab2[n]) == 0) { syn = n + 31; break; } } } else if ((ch >= '0'&&ch <= '9')) //数字 ,如果是数字,就用sum来保存这个数字 { sum = 0; while ((ch >= '0'&&ch <= '9')) { //这里*10是只考虑十进制数 sum = sum * 10 + ch - '0'; ch = prog[p++]; } p--; syn = 11; //可接收的数字的最大值为32767,如果更大,会报错 if (sum>32767) syn = -1; } else switch (ch) //如果是其他的字符 { case '<': m = 0; token[m++] = ch; ch = prog[p++]; if (ch == '>') { //说明是不等号 syn = 21; token[m++] = ch; } else if (ch == '=') { //说明是<= syn = 22; token[m++] = ch; } else { //否则,就只是一个<符号 syn = 23; //此时p回退一个 p--; } break; case '>': m = 0; token[m++] = ch; ch = prog[p++]; if (ch == '=') { syn = 24; token[m++] = ch; } else { syn = 20; p--; } break; case ':': m = 0; token[m++] = ch; ch = prog[p++]; if (ch == '=') { //说明是赋值运算符 syn = 18; token[m++] = ch; } else { //否则就只是个: syn = 17; p--; } break; case '*': syn = 13; token[0] = ch; break; case '/': syn = 14; token[0] = ch; break; case '+': syn = 15; token[0] = ch; break; case '-': syn = 16; token[0] = ch; break; case '=': syn = 25; token[0] = ch; break; case ';': syn = 26; token[0] = ch; break; case '(': syn = 27; token[0] = ch; break; case ')': syn = 28; token[0] = ch; break; case ',': syn = 29; token[0] = ch; break; case '!': syn = 30; token[0] = ch; break; case '.': //如果接收到的是.,说明到了源代码的结尾,置syn=0,函数结束 syn = 0; token[0] = ch; break; case ' ': //如果接收到的是换行符,则syn=-2,行+1 syn = -2; break; default: //如果接收到的是其他未定义的字符,置syn=-1,会报错。 syn = -1; break; } } /* 读取源代码文件(.txt) */ void read() { FILE *fp; fp = fopen(CODE, "r"); fseek(fp, 0, SEEK_END); int file_size; file_size = ftell(fp); fseek(fp, 0, SEEK_SET); prog = (char *)malloc(file_size * sizeof(char)); fread(prog, file_size, sizeof(char), fp); //关闭文件流 fclose(fp); } int main() { int p = 0; int line = 1; ofstream outfile(RESULT); cout<< "加载代码文件中......" << endl; Sleep(3000); //读取源代码文件 read(); p = 0; outfile << "词法分析的结果为:" << endl; do { scaner(); switch (syn) { case 11: //cout << "(" << syn << "," << sum << ")" << endl; outfile << "(" << syn << "," << sum << ")" << endl; break; case -1: //cout << "Error in line " << line << "!" << endl; outfile << "Error in line" << line << "!" << endl; break; case -2: line = line++; break; default: //cout << "(" << syn << "," << token << ")" << endl; outfile << "(" << syn << "," << token << ")" << endl; break; } } while (syn != 0); outfile.close(); cout << "词法分析完毕,请在result.txt中查看" << endl; system("pause"); return 0; }
理解了一部分的代码,但是有一些还暂时没有理解到位。
代码出自CSDN博主「judyge」:https://blog.csdn.net/judyge/article/details/52274690