资源描述
编译原理课程设计报告
课题名称: 编译原理课程设计
C-语言词法与语法分析器的实现
提交文档学生姓名:
提交文档学生学号:
同组 成 员 名 单:
指导 教 师 姓 名:
指导教师评阅成绩:
指导教师评阅意见:
.
.
提交报告时间: 年 月 日
C-词法与语法分析器的实现
1.课程设计目标
(1)题目实用性
C-语言拥有一个完整语言的基本属性,通过编写C-语言的词法分析和语法分析,对于理解编译原理的相关理论和知识有很大的作用。通过编写C-语言词法和语法分析程序,能够对编译原理的相关知识:正则表达式、有限自动机、语法分析等有一个比较清晰的了解和掌握。
(2)C-语言的词法说明
① 语言的关键字:
else if int return void while
所有的关键字都是保留字,并且必须是小写。
②专用符号:
+ - * / < <= > >= == != = ; , ( ) [ ] { } /* */
③其他标记是ID和NUM,通过下列正则表达式定义:
ID = letter letter*
NUM = digit digit*
letter = a|..|z|A|..|Z
digit = 0|..|9
注:ID表示标识符,NUM表示数字,letter表示一个字母,digit表示一个数字。
小写和大写字母是有区别的。
④ 空格由空白、换行符和制表符组成。空格通常被忽略。
⑤ 注释用通常的c语言符号/ * . . . * /围起来。注释可以放在任何空白出现的位置(即注释不能放在标记内)上,且可以超过一行。注释不能嵌套。
(3)程序设计目标
能够对一个程序正确的进行词法及语法分析。
2.分析与设计
(1)设计思想
a. 词法分析
词法分析的实现主要利用有穷自动机理论。有穷自动机可用作描述在输入串中识别模式的过程,因此也能用作构造扫描程序。通过有穷自动机理论能够容易的设计出词法分析器。
b. 语法分析
语法分析采用递归下降分析。递归下降法是语法分析中最易懂的一种方法。它的主要原理是,对每个非终结符按其产生式结构构造相应语法分析子程序,其中终结符产生匹配命令,而非终结符则产生过程调用命令。因为文法递归相应子程序也递归,所以称这种方法为递归子程序下降法或递归下降法。其中子程序的结构与产生式结构几乎是一致的。
(2)程序流程图
程序主流程图:
词法分析: 语法分析:
读取程序
读取程序
进行递归下降分析匹配或建立树
对输入的字符进行匹配判断
对应输出各行代码的词法分析结果
输出程序对应的语法树
词法分析子流程图:
Start
否
是
Num
是否为dight
是否为num
否
否
是
ID
是否为alpha
是否为id
否
是
是否为=
是否为>,<.,!,=
单符号
否
否
是
双符号
是否为+,-,*等专用符号
是
是
专用符号
否
是
否
是
是否为/
是否为*
是否为*
是否为/
否
否
是
错误
结果over
语法分析子流程图:
3.程序代码实现
整个词法以及语法的程序设计在一个工程里面,一共包含了8个文件,分别为main.cpp、parse.cpp、scan.cpp、util.cpp、scan.h、util.h、 globals.h、parse.h,其中scan.cpp和scan.h为词法分析程序。
以下仅列出各个文件中的核心代码:
Main.cpp
#include "globals.h"
#define NO_PARSE FALSE
#include "util.h"
#if NO_PARSE
#include "scan.h"
#else
#include "parse.h"
#endif
int lineno=0;
FILE * source;
FILE * listing;
FILE * code;
int EchoSource = TRUE;
int TraceScan=TRUE;
int TraceParse=TRUE;
int Error = FALSE;
int main(int argc,char * argv[])
{
TreeNode * syntaxTree;
char pgm[120];
scanf("%s",pgm);
source=fopen(pgm,"r");
if(source==NULL)
{
fprintf(stderr,"File %s not found\n",pgm);
exit(1);
}
listing = stdout;
fprintf(listing,"\nC COMPILATION: %s\n",pgm);
#if NO_PARSE
while(getToken()!=ENDFILE);
#else
syntaxTree = parse();
if(TraceParse){
fprintf(listing,"\nSyntaxtree:\n");
printTree(syntaxTree);
}
#endif
fclose(source);
return 0;
}
Parse.cpp
#include "globals.h"
#include "util.h"
#include "scan.h"
#include "parse.h"
static TokenType token; /* holds current token */
/* function prototypes for recursive calls */
static TreeNode * declaration_list(void);
static TreeNode * declaration(void);
static TreeNode * params(void);
static TreeNode * param_list(void);
static TreeNode * param(void);
static TreeNode * compound_stmt(void);
static TreeNode * local_declarations(void);
static TreeNode * statement_list(void);
static TreeNode * statement(void);
static TreeNode * expression_stmt(void);
static TreeNode * if_stmt(void);
static TreeNode * while_stmt(void);
static TreeNode * return_stmt(void);
static TreeNode * expression(void);
static TreeNode * var(void);
static TreeNode * simple_exp(void);
static TreeNode * additive_expression(void);
static TreeNode * term(void);
static TreeNode * factor(void);
static TreeNode * args(void);
static TreeNode * arg_list(void);
static void syntaxError(char * message)
{ fprintf(listing,"\n>>> ");
fprintf(listing,"Syntax error at line %d: %s",lineno,message);
Error = TRUE;
}
/*判断读取的字符*/
static void match(TokenType expected)
{
if(token==expected)
{
token=getToken( );
}
else
{
syntaxError("unexpected token -> ");
printToken(token,tokenString);
fprintf(listing," ");
}
}
/*进行语法分析,构建语法树*/
TreeNode * declaration_list(void)
{
TreeNode * t= declaration();
TreeNode * p= t;
while ((token==INT) || (token==VOID) )
{
TreeNode *q = declaration();
if (q!=NULL) {
if (t==NULL) t = p = q;
else /* now p cannot be NULL either */
{
p->sibling = q;
p = q;
}
}
}
return t;
}
TreeNode * declaration(void)
{ TreeNode * t = NULL;
switch (token)
{
case VOID :
case INT :
t = newStmtNode(DecK);
if(token == INT)
t->type =Integer;
else
t->type = Void;
match(token);
switch (token)
{
case ID:
t->attr.name = copyString(tokenString);
t->kind.stmt = VarDK;
match(ID);
switch (token)
{
case LZKH:
t->kind.stmt = VarDK;
t->type = IntArray;
match(LZKH);
match(NUM);
match(RZKH);
match(SEMI);
break;
case LPAREN:
t->kind.stmt = FunDK;
match(LPAREN);
t->child[0] = params();
match(RPAREN);
t->child[1] = compound_stmt();
break;
default: match(SEMI);break;
}
break;
default:syntaxError("unexpected token -> ");
printToken(token,tokenString);
token = getToken();
break;
}
break;
default : syntaxError("unexpected token -> ");
printToken(token,tokenString);
token = getToken();
break;
} /* end case */
return t;
}
TreeNode * params(void)
{
TreeNode * t = NULL;
if(token == VOID)
{
match(token);
t = newStmtNode(ParamList);
t->child[0] = newStmtNode(ParamK);
t->child[0]->type = Void;
}
else if(token == RPAREN)
t=NULL;
else
{
t = param_list();
}
return t;
}
TreeNode * param_list(void)
{
TreeNode * t = newStmtNode(ParamList);
int i = 1;
t->child[0] = param();
while(token != RPAREN)
{
match(DOT);
t->child[i] = param();
i++;
}
return t;
}
TreeNode * param(void)
{
TreeNode * t = NULL;
match(INT);
t= newStmtNode(ParamK);
t->type=Integer;
t->attr.name=copyString(tokenString);
match(ID);
if(token == LZKH)
{
t->type=IntArray;
match(LZKH);
match(RZKH);
}
return t;
}
TreeNode * compound_stmt(void)
{
TreeNode * t = newStmtNode(ComK);
match(LDKH);
t->child[0] = local_declarations();
t->child[1] = statement_list();
match(RDKH);
return t;
}
TreeNode * local_declarations(void)
{
TreeNode * t = newStmtNode(LocalDecK);
int i=0;
while(token == INT || token == VOID)
{
t->child[i] = declaration();
i++;
}
return t;
}
TreeNode * statement_list(void)
{
TreeNode * t = newStmtNode(StmtList);
int i=0;
while(token != RDKH)
{
t->child[i] =statement();
i++;
}
return t;
}
TreeNode * statement(void)
{
TreeNode * t ;
switch (token) {
case IF : t = if_stmt(); break;
case WHILE : t = while_stmt(); break;
case ID :
case SEMI:
t = expression_stmt(); break;
case RETURN : t = return_stmt(); break;
case LDKH : t=compound_stmt();break;
default : syntaxError("unexpected token -> ");
printToken(token,tokenString);
token = getToken();
break;
} /* end case */
return t;
}
TreeNode * expression_stmt(void)
{
TreeNode * t = newStmtNode(ExpstmtK);
if(token == SEMI)
match(SEMI);
else
{
t = expression();
match(SEMI);
}
return t;
}
TreeNode * if_stmt(void)
{
TreeNode * t = newStmtNode(IfK);
if(t!=NULL)
{
match(IF);
match(LPAREN);
t->child[0] = expression();
match(RPAREN);
t->child[1] = statement();
if (token==ELSE)
{
match(ELSE);
if (t!=NULL) t->child[2] = newStmtNode(ElseK);
t->child[2]->child[0] = statement();
} }
return t;
}
TreeNode * while_stmt(void)
{
TreeNode * t = newStmtNode(WhileK);
match(WHILE);
match(LPAREN);
if (t!=NULL) t->child[0] = expression();
match(RPAREN);
if (t!=NULL) t->child[1] = statement();
return t;
}
TreeNode * return_stmt(void)
{
TreeNode * t = newStmtNode(RetK);
if(token == RETURN)
match(RETURN);
if(token == SEMI)
match(SEMI);
else
{
t->child[0] = expression();
match(SEMI);
}
return t;
}
TreeNode * expression(void)
{
TreeNode * t = simple_exp();
return t;
}
TreeNode* var(void)
{
TreeNode* t = newExpNode(IdK);
if ((t!=NULL) && (token==ID))
t->attr.name = copyString(tokenString);
match(ID);
if(token == LZKH)
{
match(token);
t->type = ArrayUnit;
t->child[0] = expression();
match(RZKH);
}
return t;
}
TreeNode * simple_exp(void)
{
TreeNode * t = additive_expression();
if(t!=NULL){
if (token == LT || token == LE|| token == MT || token == ME||token ==EQ||token ==NEQ)
{
TreeNode * p = newExpNode(OpK);
if(p!=NULL)
{
p->attr.op = token;
p->child[0] = t;
match(token);
p->child[1] = additive_expression();
t=p;
}
}
}
return t;
}
TreeNode* additive_expression(void)
{
TreeNode * t = term();
while(token == PLUS || token == MINUS)
{
TreeNode * p = newExpNode(OpK);
p->attr.op = token;
p->child[0] = t;
match(token);
p->child[1] = term();
t = p;
}
return t;
}
TreeNode * term(void)
{
TreeNode * t = factor();
while ((token==TIMES)||(token==OVER))
{
TreeNode * p = newExpNode(OpK);
if (p!=NULL) {
p->child[0] = t;
p->attr.op = token;
match(token);
p->child[1] = factor();
t = p;
}
}
return t;
}
TreeNode * factor(void)
{
TreeNode * t = NULL;
switch (token)
{
case NUM :
t = newExpNode(ConstK);
if ((t!=NULL) && (token==NUM))
t->attr.val = atoi(tokenString);
match(NUM);
break;
case ID :
t = var();
if (token == ASSIGN)
{
TreeNode* p = newStmtNode(AssignK);
p->attr.name = t->attr.name;
match(token);
p->child[0] = expression();
t = p;
}
if (token == LPAREN )
{
TreeNode * p = newStmtNode(CallK);
p->attr.name = t->attr.name;
t=p;
match(token);
p->child[0] = args();
match(RPAREN);
}
break;
case LPAREN :
match(LPAREN);
t = expression();
match(RPAREN);
break;
default:
syntaxError("unexpected token -> ");
printToken(token,tokenString);
token = getToken();
break;
}
return t;
}
TreeNode * args(void)
{
TreeNode * t = newStmtNode(ArgList);
if(token != RPAREN)
{
t->child[0] = arg_list();
return t;
}else
return NULL;
}
TreeNode * arg_list(void)
{
TreeNode * t = newStmtNode(ArgK);
int i = 1;
if(token != RPAREN)
t->child[0] = expression();
while(token!=RPAREN)
{
match(DOT);
t->child[i] = expression();
i++;
}
return t;
}
TreeNode * parse(void)
{ TreeNode * t;
token = getToken();
t =declaration_list();
if (token!=ENDFILE)
syntaxError("Code ends before file\n");
return t;
}
scan.cpp
#include "globals.h"
#include "util.h"
#include "scan.h"
/*对扫描的字符进行匹配判断*/
TokenType getToken(void)
{ /* index for storing into tokenString */
int tokenStringIndex = 0;
/* holds current token to be returned */
TokenType currentToken;
/* current state - always begins at START */
StateType state = START;
/* flag to indicate save to tokenString */
int save;
while (state != DONE)
{ int c = getNextChar();
save = TRUE;
switch (state)
{ case START:
if (isdigit(c))
state = INNUM;
else if (isalpha(c))
state = INID;
else if (c == '=')
state = INEQUAL;
else if (c == '<')
state = INLE;
else if (c == '>')
state = INME;
else if ((c == ' ') || (c == '\t') || (c == '\n'))
save = FALSE;
else if (c== '!')
state = INNEQ;
else if (c == '/')
{
if(getNextChar()!='*')
{
ungetNextChar();
state = DONE;
currentToken = OVER;
break;
}
else
{
save = FALSE;
state = INCOMMENT;
}
}
else
{ state = DONE;
switch (c)
{ case EOF:
save = FALSE;
currentToken = ENDFILE;
break;
case '+':
currentToken = PLUS;
break;
case '-':
currentToken = MINUS;
break;
case '*':
currentToken = TIMES;
break;
case '(':
currentToken = LPAREN;
break;
case ')':
currentToken = RPAREN;
break;
case ';':
currentToken = SEMI;
break;
case '[':
currentToken=LZKH;
break;
case ']':
currentToken=RZKH;
break;
case '{':
currentToken=LDKH;
break;
case '}':
currentToken=RDKH;
break;
case ',':
currentToken=DOT;
break;
default:
currentToken = ERROR;
break;
}
}
break;
case INCOMMENT:
save = FALSE;
if (c == EOF)
{
state = DONE;
currentToken = ERROR;
}
else if(c=='*')
{
if(getNextChar()=='/')
{
state = START;
}
else
{
ungetNextChar();
}
}
break;
case INNEQ:
state=DONE;
if(c=='=')
currentToken=NEQ;
else
{
ungetNextChar();
save=FALSE;
currentToken=ERROR;
}
break;
case INEQUAL:
state = DONE;
if (c == '=')
currentToken = EQ;
else
{ /* backup in the input */
ungetNextChar();
currentToken = ASSIGN;
}
break;
case INNUM:
if (!isdigit(c))
{ /* backup in the input */
ungetNextChar();
save = FALSE;
state = DONE;
currentToken = NUM;
}
break;
case INID:
if (!isalpha(c))
{ /* backup in the input */
ungetNextChar();
save = FALSE;
state = DONE;
currentToken = ID;
}
break;
case INLE:
state = DONE;
if(c== '=')
currentToken = LE;
else
{ /* backup in the input */
ungetNextChar();
currentToken = LT;
}
break;
case INME:
state = DONE;
if(c== '=')
currentToken = ME;
else
{ /* backup in the input */
ungetNextChar();
currentToken = MT;
}
break;
case DONE:
default: /* should never happen */
fprintf(listing,"Scanner Bug: state= %d\n",state);
state = DONE;
currentToken = ERROR;
break;
}
if ((save) && (tokenStringIndex <= MAXTOKENLEN
展开阅读全文