写了一个对xml文件的词法分析,程序的分析引擎用switch+state写的,主要是为加深对词法分析的理解.
while (sr.Peek()!=-1)
{
//int c = sr.Read();
//Char ch = (char)c;
//Console.WriteLine("{0}\t {1}\t {2}",ch,c, Convert.ToString(c,2).PadLeft(16,'0') );
////switch
c = (char)sr.Read();
switch(state)
{
case 0://普通
jumpspace();
if(c=='<')
state = 10;
break;
case 10://<[?]
switch(c)
{
case '?': state =11;//<?
break;
case ' ':
case '\t':
case '\r':
case '\n':
throw new Exception("该位置上不允许有空白");
break;
case '/':
state = 50;//结束<[/].>
break;
default:
state = 20;//元素名
sb.Remove(0,sb.Length);//加元素
sb.Append(c);
while (sr.Peek() != -1)
{
char nextchar = (char)sr.Peek();
if (char.IsLetter(nextchar))
{
sb.Append(nextchar);//element
c = (char)sr.Read();
}
else
{
//完成元素
//string temp = sb.ToString();
Console.WriteLine(sb.ToString());
StID.Push(sb.ToString());
sb.Remove(0, sb.Length);
state = 15;//后接 / > ' '
jumpspace();
break;
}
}
break;
}
break;
case 11://<?[x] --系统
switch(c)
{
case 'x':
state =12;
break;
}
break;
case 15:
jumpspace();
if (c == '/' )
{
Console.WriteLine( StID.Pop());
//完成
c =(char)sr.Read();
if (c != '>')
throw new Exception("/>");
//下来可能是节点,也可能是文本
state = 0;
}
else if (c == '>')
{
state = 0;//原始状态
}
else if (char.IsLetterOrDigit(c))
{
//sb.Append(c);//属性
//属性开始
state = 30;
goto case 30;
}
else
{
throw new Exception("错误的属性");
}
break;
case 20://元素名-第二个 只是元素
if (char.IsLetterOrDigit(c))
{
sb.Append(c);//element
}
else if (char.IsWhiteSpace(c))//遍历空白
{
//完成
//string temp = sb.ToString();
Console.WriteLine(sb.ToString());
StID.Push(sb.ToString());
sb.Remove(0, sb.Length);
while (true)
{
c = (char)sr.Peek();
if (!char.IsWhiteSpace(c))//如果不空白,跳出
break;
sr.Read();
}
state = 30;//属性
}
else if (c == '>')
{
//完成
//string temp = sb.ToString();
Console.WriteLine(sb.ToString());
StID.Push(sb.ToString());
sb.Remove(0, sb.Length);
state = 15;//中间状态,下一个可能是元素也可能是text
}
else
{
throw new Exception("无效的字符");
}
break;
case 30://属性名first 后继
if (char.IsLetterOrDigit(c))
{
sb.Append(c);//属性
}
else
{
goto case 31;
}
break;
case 31:
jumpspace();
if(c=='=')
{
state =32;
}
else
{
throw new Exception("属性无效字符!");
}
break;
case 32://属性引号
jumpspace();
switch(c)
{
case '\'':
state =36;//单引号;
break;
case '\"':
state = 37;//双引号
break;
default:
throw new Exception("应该是引号");
}
break;
case 36:
switch (c)
{
case '\'':
state = 30;//复原,下一属性
jumpspace();
state = 15;
break;
case '>':
throw new Exception("没有结束引号");
break;
}
break;
case 37:
switch (c)
{
case '\"':
state = 30;//复原,下一属性
jumpspace();
state = 15;
break;
case '>':
throw new Exception("没有结束引号");
break;
}
break;
case 50:
sb.Remove(0,sb.Length);
if (char.IsLetterOrDigit(c))
sb.Append(c);
else
throw new Exception("错误的结束字符");
while (sr.Peek() != -1)
{
tempc = (char)sr.Peek();
if (char.IsLetterOrDigit(tempc))
{
sb.Append(tempc);
c = (char)sr.Read();
}
else
{
//Console.WriteLine(StID.Peek
tempstr = StID.Peek();
if(StID.Pop()!=sb.ToString())
throw new Exception (string.Format( "结束标记 '{0}' 与开始标记 '{1}' 不匹配"
,sb.ToString()
,tempstr));
state =51;//处理结尾 ' ' >
break;
}
}
break;
case 51:
jumpspace();
switch (c)
{
case '>':
state = 0;//普通状态
break;
default:
throw new Exception("名称包含无效字符");
}
break;
}