• [C++] 简易词法分析器


      1.5H写出来的实验,没有用自动机,因为觉得很难画,不如直接模拟。等有时间会画一个自动机出来并且根据自动机写出一个更象样的词法分析器。

      定义假设单词种别按如下形式编码:

      保留字:单词种别码均为1。输出样式示例:(1if)。

      标识符;单词种别码均为2。输出样式示例:(2x)。

      无符号整数;单词种别均码为3。输出样式示例:(310)。

      运算符:单词种别码均为4。输出样式示例:(4+)。

      分隔符:单词种别码均为5。输出样式示例:(5, ;)。

      这个词法分析器可以过滤“//”和"/**/"的注释。

      非常简易,如果各位发现了问题,请及时告知我。非常感谢!

      1 #include <bits/stdc++.h>
      2 using namespace std;
      3 
      4 const char* SAVEPATH = "./in";
      5 const char* KEYWORD[66] = {"asm","do","if","return","typedef","auto","double","inline","short","typeid","bool","dynamic_cast","int","signed","typename","break","else","long","sizeof","union","case","enum","mutable","static","unsigned","catch","explicit","namespace","static_cast","using","char","export","new","struct","virtual","class","extern","operator","switch","void","const","false","private","template","volatile","const_cast","float","protected","this","wchar_t","continue","for","public","throw","while","default","friend","register","true","delete","goto","reinterpret_cast","try"};
      6 const char* OPERATOR[33] = {"~", "->", "!", "++", "--", "'", "+", "-", "*", "/", "=", "<", "<=", ">", ">=", "!=", ">>", "<<", "==", "%", "&", "^", "|", "?", "&&", "||"};
      7 const char SPLIT[10] = {'(', ')', '{', '}', ';', ',', ':'};
      8 const char* SSPLIT[13] = {"(", ")", "{", "}", ";", ",", ":"};
      9 
     10 const int KEYWORDSIZE = 36;
     11 const int OPERATORSIZE = 26;
     12 const int SPLITSIZE = 7;
     13 
     14 
     15 inline bool isAlpha(char ch) {
     16     return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
     17 }
     18 
     19 
     20 inline bool isDigit(char ch) {
     21     return ch >= '0' && ch <= '9';
     22 }
     23 
     24 
     25 inline bool isSplit(char ch) {
     26     for(int i = 0; i < 6; i++) {
     27         if(ch == SPLIT[i]) return true;
     28     }
     29     return false;
     30 }
     31 
     32 
     33 inline bool isSign(char ch) {
     34     return ch == '+' || ch == '-' ||
     35                  ch == '*' || ch == '/' ||
     36                  ch == '=' || ch == '<' ||
     37                  ch == '>' || ch == '!';
     38 }
     39 
     40 
     41 void GetSourceFile(const char* path, vector<string>& saveVec) {
     42     string buffer;
     43     ifstream is(path);
     44     saveVec.clear();
     45     while(!is.eof()) {
     46         getline(is, buffer);
     47         saveVec.push_back(buffer);
     48     }
     49 }
     50 
     51 
     52 void _PreProcess(string line, vector<string>& result) {
     53     int lidx = 0;
     54     while(lidx < line.length()) {
     55         if(line[lidx] == ' ' || line[lidx] == '    ') {
     56             lidx++;
     57             continue;
     58         }
     59         if(isDigit(line[lidx]) || isAlpha(line[lidx])) {
     60             int ridx = lidx;
     61             while((isDigit(line[ridx]) || isAlpha(line[ridx])) &&
     62                         !isSplit(line[ridx]) && ridx < line.length() && line[ridx] != ' ' && line[ridx] != '    ') {
     63                 ridx++;
     64             }
     65             result.push_back(line.substr(lidx, ridx-lidx));
     66             lidx = ridx;
     67         }
     68         else if(isSign(line[lidx])) {
     69             int ridx = lidx;
     70             while(isSign(line[ridx]) && line[ridx] != ' ' && line[ridx] != '    ' && 
     71                         !isSplit(line[ridx]) && ridx < line.length()) {
     72                 ridx++;
     73             }
     74             result.push_back(line.substr(lidx, ridx-lidx));
     75             lidx = ridx;
     76         }
     77         else if(isSplit(line[lidx])) {
     78             result.push_back(line.substr(lidx, 1));
     79             lidx++;
     80         }
     81         else lidx++;
     82     }
     83 }
     84 
     85 
     86 void RemoveAnnotation(string& line, bool& isAnt) {
     87     if(line.length() < 2) return;
     88     if(isAnt) {
     89         for(int i = 0; i < line.length() - 1; i++) {
     90             if((line[i] == '*' && line[i+1] == '/')) {
     91                 line = line.substr(i+2, line.length()-i-2);
     92                 isAnt = false;
     93                 return;
     94             }
     95         }
     96         line = "";
     97     }
     98     else {
     99         for(int i = 0; i < line.length() - 1; i++) {
    100             if((line[i] == '/' && line[i+1] == '/')) {
    101                 line = line.substr(0, i);
    102                 return;
    103             }
    104             if((line[i] == '/' && line[i+1] == '*')) {
    105                 line = line.substr(0, i);
    106                 isAnt = true;
    107                 return;
    108             }
    109         }
    110     }
    111 }
    112 
    113 
    114 vector<string> PreProcess(vector<string>& saveVec) {
    115     vector<string> result;
    116     bool isAnt = false;
    117     for(auto& line : saveVec) {
    118         RemoveAnnotation(line, isAnt);
    119     }
    120     for(auto line : saveVec) _PreProcess(line, result);
    121     return result;
    122 }
    123 
    124 
    125 pair<int, string> analyse(string line) {
    126     typedef pair<int, string> PIS;
    127     // 保留字
    128     for(int i = 0; i < KEYWORDSIZE; i++) if(strcmp(line.c_str(), KEYWORD[i]) == 0) return PIS(1, line);
    129     // 运算符
    130     for(int i = 0; i < OPERATORSIZE; i++) if(strcmp(line.c_str(), OPERATOR[i]) == 0) return PIS(4, line);
    131     // 分隔符
    132     for(int i = 0; i < SPLITSIZE; i++) if(strcmp(line.c_str(), SSPLIT[i]) == 0) return PIS(5, line);
    133     bool digitFlag = false, alphaFlag = false;
    134     for(auto ch : line) {
    135         if(isDigit(ch)) digitFlag = true;
    136         if(isAlpha(ch)) alphaFlag = true;
    137     }
    138     // 无符号整数
    139     if(digitFlag && !alphaFlag) return PIS(3, line);
    140     return PIS(2, line);
    141 }
    142 
    143 
    144 vector<pair<int, string>> Analyse(vector<string> saveVec) {
    145     int lidx = 0;
    146     vector<pair<int, string>> result;
    147     for(auto line : saveVec) {
    148         result.push_back(analyse(line));
    149     }
    150     return result;
    151 }
    152 
    153 
    154 int main() {
    155     vector<string> saveVec;
    156     vector<string> processResult;
    157     vector<pair<int, string>> analyseResult;
    158 
    159     GetSourceFile(SAVEPATH, saveVec);
    160 
    161     processResult = PreProcess(saveVec);
    162 
    163     analyseResult = Analyse(processResult);
    164 
    165     for(int i = 0; i < analyseResult.size(); i++) {
    166         cout << analyseResult[i].first << " " << analyseResult[i].second << endl;
    167     }
    168 }

    可以试一下输入下面的内容:

     1  /*bool checkIfSatisfied(vector<pair<int, string>>& analyseResult) {
     2  // 存在不合法则返回0
     3  for(int i = 0; i < analyseResult.size(); i++) {
     4   if(analyseResult[i].first == -1) return false;
     5  }
     6  return true;
     7 }*/ sss11;
     8 
     9 
    10 int main() {
    11  vector<string> saveVec;
    12  vector<string> processResult;
    13  vector<pair<int, string>> analyseResult;
    14 
    15  GetSourceFile(SAVEPATH, saveVec);
    16 
    17  processResult = PreProcess(saveVec);
    18 
    19  analyseResult = Analyse(processResult);
    20 
    21  // if(checkIfSatisfied(analyseResult)) { 
    22   for(int i = 0; i < analyseResult.size(); i++) {
    23    cout << analyseResult[i].first << " " << analyseResult[i].second << endl;
    24   }
    25  // }
    26  // else {
    27   // cout << "Syntex Error !" << endl;
    28  // }
    29 }

    输出的是:

    2 sss11
    5 ;
    1 int
    2 main
    5 (
    5 )
    5 {
    2 vector
    4 <
    2 string
    4 >
    2 saveVec
    5 ;
    2 vector
    4 <
    2 string
    4 >
    2 processResult
    5 ;
    2 vector
    4 <
    2 pair
    4 <
    1 int
    5 ,
    2 string
    4 >>
    2 analyseResult
    5 ;
    2 GetSourceFile
    5 (
    2 SAVEPATH
    5 ,
    2 saveVec
    5 )
    5 ;
    2 processResult
    4 =
    2 PreProcess
    5 (
    2 saveVec
    5 )
    5 ;
    2 analyseResult
    4 =
    2 Analyse
    5 (
    2 processResult
    5 )
    5 ;
    2 for
    5 (
    1 int
    2 i
    4 =
    3 0
    5 ;
    2 i
    4 <
    2 analyseResult
    2 size
    5 (
    5 )
    5 ;
    2 i
    4 ++
    5 )
    5 {
    2 cout
    4 <<
    2 analyseResult
    2 i
    2 first
    4 <<
    4 <<
    2 analyseResult
    2 i
    2 second
    4 <<
    2 endl
    5 ;
    5 }
    5 }
  • 相关阅读:
    Django 之Redis配置
    python之类中如何判断是函数还是方法
    Anaconda 虚拟环境安装及应用
    【转载】IDEA:XML配置提示URI is not registered
    idea中配置xml不自动提示解决方案
    Java接口成员变量和方法默认修饰符
    [转载]java中Date,SimpleDateFormat
    intellij idea 的全局搜索快捷键方法
    【转载】使用IntelliJ IDEA提示找不到struts-default文件
    【转载】Jmeter分布式部署测试-----远程连接多台电脑做压力性能测试
  • 原文地址:https://www.cnblogs.com/kirai/p/6762922.html
Copyright © 2020-2023  润新知