这学期的编译原理课语法分析已经快讲完了。整个编译器的前端主要就包括了词法分析和语法分析两个部分。现在自己开始着手用c语言写编译器的前端。以下是程序的说明。
1)该程序是词法分析部分lexical analyse。
2)只支持无符号整数类型。不支持数组和指针等高级特性。
3)不支持函数。注释为c++式的“//”。
4)使用vc++.net编译。
//////////////////////////////lexical.h////////////////////////////////////
#ifndef _lexical_h_
#define _lexical_h_
//状态机状态枚举
typedef enum {start=0,unknown=0,comment,number,identi,if,else,end,repeat,until,assign,equal,plus,minus,multi,divide,less,greater,lparen,rparen,semi,expression} lextokentype;
//状态转换结构
typedef struct {lextokentype beg; char domainbeg; char domainend; lextokentype end;} transstate;
//关键字描述结构
typedef struct {char strexp[10]; lextokentype type;} tokendes;
//记号结构
typedef struct {char *strname; lextokentype type;} lextoken;
lextoken* getnexttoken(char *psource);
#endif//_lexical_h_
//////////////////////////////lexical.c////////////////////////////////////
#include <stdio.h>
#include <malloc.h>
#include <string.h>
#include "lexical.h"
//有限状态机状态转换描述
transstate trans[] ={
{number,'0','9',number},
{identi,'a','z',identi},
{identi,'a','z',identi},
{identi,'0','9',identi},
{start,' ',' ',start},
{start,'\t','\t',start},
{start,'\n','\n',start},
{start,'\r','\r',start},
{start,'0','9',number},
{start,'a','z',identi},
{start,'a','z',identi},
{start,'+','+',plus},
{start,'-','-',minus},
{start,'*','*',multi},
{start,'/','/',divide},
{start,'<','<',less},
{start,'>','>',greater},
{start,'(','(',lparen},
{start,')',')',rparen},
{start,';',';',semi},
{start,'=','=',assign},
{divide,'/','/',comment},
{comment,'\n','\n',start},
{comment,'\n'+1,127,comment},
{comment,1,'\n'-1,comment},
{comment,-128,-1,comment},
{assign,'=','=',equal},
};
//关键字(保留字)描述
tokendes reserve[] ={
{"if",if},
{"else",else},
{"end",end},
{"repeat",repeat},
{"until",until},
};
lextoken* getnexttoken(char *psource)
{//考虑速度,不检测file合法性
static int lineno = 0;
static int posnow = 0;
static int sizestatetrans = sizeof(trans)/sizeof(transstate);
static int sizereserve = sizeof(reserve)/sizeof(tokendes);
static int sizetoken = sizeof(lextoken);
lextoken *ptoken = null;
lextokentype curstate = start;
int posstart = posnow;
int i,j,tokenlen=0;
char ch;
ptoken = (lextoken*)malloc(sizetoken);//省略错误检查
while (1)
{
ch = psource[posnow];
if (ch == '\n')//如果碰到回车字符,行号加1
lineno++;
for (i=0; i<sizestatetrans; i++)
{
if ((curstate==trans[i].beg) && (ch>=trans[i].domainbeg) && (ch<=trans[i].domainend))//满足该状态转换
{
curstate = trans[i].end;//转换到该状态
break;//跳出for循环,准备察看下一个字符
}
}
if (i == sizestatetrans)//未找到合适的状态转换
{
if (curstate == start)//如果开始于start状态
{
if (ch == '\0')//遇到文件尾
{