资源描述
编译原理课程设计报告
课题名称: C- Minus词法分析和语法分析设计
提交文档学生姓名: X X X
提交文档学生学号: XXXXXXXXXX
同组 成 员 名 单: X X X
指导 教 师 姓 名: X X
指导教师评阅成绩:
指导教师评阅意见:
.
.
提交报告时间:2015年6月10日
1. 课程设计目标
实验建立C-编译器。只含有扫描程序(scanner)和语法分析(parser)部分。
2. 分析与设计
C-编译器设计的整体框架,本实验实现扫描处理和语法分析程序(图中粗黑部分)。
2.1 、扫描程序scanner部分
2.1.1系统设计思想
设计思想:根据DFA图用switch-case结构实现状态转换。
惯用词法:
① 语言的关键字:else if int return void while
② 专用符号:+ - * / < <= > >= == != = ; , ( ) [ ] { } /* */
③ 其他标记是ID和NUM,通过下列正则表达式定义:
ID = letter letter*
NUM = digit digit*
letter = a|..|z|A|..|Z
digit = 0|..|9
大写和小写字母是有区别的
④ 空格由空白、换行符和制表符组成。空格通常被忽略,除了它必须分开ID、NUM关键字。
⑤ 注释用通常的C语言符号/ * . . . * /围起来。注释可以放在任何空白出现的位置(即注释不能放在标记内)上,且可以超过一行。注释不能嵌套
说明:当输入的字符使DFA到达接受状态的时候,则可以确定一个单词了。初始状态设置为START,当需要得到下一个token时,取得次token的第一个字符,并且按照DFA与对此字符的类型分析,转换状态。重复此步骤,直到DONE为止,输出token类型。当字符为“/”时,状态转换为SLAH再判断下一个字符,如果为“*”则继续转到INCOMMENT,最后以“*”时转到ENDCOMMENT状态,表明是注释,如果其他的则是字符停滞于当前字符,并且输出“/”。
2.1.2程序流程图
2.1.3 各文件或函数的设计说明
扫描程序用到:scanner.h,scanner.cpp
Ø scanner.h:声明词法状态,词法分析
//DFA中的状态
typedef enum
{
START = 1, INNUM, INID, INDBSYM, DONE
} DFAState;
//定义的Token的类型(31种),分别对应于else、if、int、return、void、while、+、-、*、/、<、<=、>、>=、==、!=、=、;、,、(、)、[、]、{、}、/*、*/、num、id、错误、结束
typedef enum
{
ELSE = 1,IF,INT,RETURN,VOID,WHILE,
PLUS,MINUS,TIMES,OVER,LT,LEQ,GT,GEQ,EQ,NEQ,ASSIGN,SEMI,COMMA,LPAREN,RPAREN,LMBRACKET,RMBRACKET,LBBRACKET,RBBRACKET,LCOMMENT,RCOMMENT,
NUM,ID,ERROR,ENDFILE
} TokenType;
//定义的Token结构体,包括类型、对应的串、所在代码的行号
struct Token
{
TokenType tokenType;
string tokenString;
int lineNo;
};
//每种TokenType对应的串,如tokenTypeString[ELSE]=="ELSE"
const string tokenTypeString[32] = {"OTHER", "ELSE", "IF", "INT", "RETURN", "VOID", "WHILE", "PLUS", "MINUS", "TIMES", "OVER", "LT", "LEQ", "GT", "GEQ", "EQ", "NEQ", "ASSIGN", "SEMI", "COMMA", "LPAREN", "RPAREN", "LMBRACKET", "RMBRACKET", "LBBRACKET", "RBBRACKET", "LCOMMENT", "RCOMMENT", "NUM", "ID", "ERROR", "ENDFILE"};
class Scanner:定义scanner.cpp中函数
Ø scanner.cpp文件函数说明
void Scanner :: scan():设置输出结果界面以及设置各种输出状态。
if(scanSuccess==false)
cout<<"词法分析出错!"<<endl;
else
cout<<"词法分析成功了!"<<endl;
printToken();/*输出Token到文件Token.txt中*/
//正在删除注释
void Scanner :: deleteComments()
TokenType Scanner :: returnTokenType(string s)//返回Token的类型
DFAState Scanner :: charType(char c)//返回字符的类型
typedef enum
{ ENDFILE,ERROR,
IF,ELSE,INT,RETURN,VOID,WHILE, //关键字
ID,NUM,
ASSIGN,PLUS,MINUS,TIMES,OVER,EQ,UEQ,LT,LPAREN,RPAREN,SEMI,BT,LQ,BQ,
DOU,LZGH,RZGH,LDGH,RDGH,//特殊字符:= + - * / == != < 等
} TokenType;
2.1.4 测试程序说明
根据附录A后面的例子,程序输入两个整数,计算并打印出它们的最大公因子,保存为a.txt。
/* A program to perform Eucild's
Algorithm to compute gcd. */
int gcd (int u, int v)
{
if (v==0)
return u;
else return
gcd(v,u-u/v*v); /* u-u/v*v== u mod v */
}
void main(void)
{
int x;
int y;
x=input();
y=input();
output(gcd(x,y));
}
2.2、语法分析parse部分
2.2.1系统设计思想
设计思想:parser用递归下降分析方法实现,通过调用词法分析函数getToken实现语法分析。
根据C-语言的规则,得出BNF语法如下:
1.program->declaration-list
2.declaration-list->declaration-list declaration | declaration
3.declaration->var-declaration|fun-declaration
4.var-declaration->type-specifier ID;|type-specfier ID[NUM]
5.type-specifier->int|void
6.fun-specifier ID(parans) compound-stmt
7.params->params-list|void
8.param-list->param-list,param|param
9.param->type-specifier ID|type-specifier ID []
pound-stmt->{local-declarations statement-list}
11.local-declarations->local-declarations var-declaration|empty
12.statement-list->statement-list statement|empty
13.statement->expression-stmt|compound-stmt|selection-stmt|iteration-stmt|return-stmt
14.expression-stmt->expression;|;
15.selection-stmt->if(expression)statement|if(expression)statement else statement
16.iteration-stmt->while(expression)statement
17.return-stmt->return ;|return expression;
18.expression->var=expression|simple-expression
19.var->ID|ID[expression]
20.simple-expression->additive-expression relop additive-expression|additive-expression
21.relop-><=|<|>|>=|==|!=
22.additive-expression->additive-expression addop term|term
23.addop->+|-
24.term->term mulop factor|factor
25.mulop->*|/
26.factor->(expression)|var|call|NUM
27.call->ID(args)
28.args->arg-list|empty
29.arg-list->arg-list,expression|expression
2.1.2语法分析程序流程图
2.1.3 各文件或函数的设计说明
语法分析程序包括:parser.cpp,parser.h
Ø parser.cpp:
Parser :: Parser()//界面设计
Token Parser :: getToken()//获取scanner中保存在TokenList数组中的Token,并且每次获取完之后数组下标指向下一个
void Parser :: syntaxError(string s)//出错处理
void Parser :: match(TokenType ex)//匹配出错
TreeNode * Parser :: declaration(void)//类型匹配错误
TreeNode * Parser :: param_list(TreeNode * k)//k可能是已经被取出来的VoidK,但又不是(void)类型的参数列表,所以一直传到param中去,作为其一个子节点
Ø parse.h:对parse.c的函数声明
//19种节点类型,分别表示int、id、void、数值、变量声明、数组声明、函数声明、函数声明参数列表、函数声明参数、复合语句体、if、while、return、赋值、运算、数组元素、函数调用、函数调用参数列表、未知节点
typedef enum {IntK, IdK, VoidK, ConstK, Var_DeclK, Arry_DeclK, FunK, ParamsK, ParamK, CompK, Selection_StmtK, Iteration_StmtK, Return_StmtK, AssignK, OpK, Arry_ElemK, CallK, ArgsK, UnkownK} Nodekind;
typedef enum {Void,Integer} ExpType;
ofstream fout_Tree("tokenTree.txt");//输出语法树到文件
//treeNode定义 包括子节点、兄弟节点、所处行号、节点类型、属性、表达式返回类型
typedef struct treeNode
TreeNode * newNode(Nodekind k);//根据节点类型新建节点
TreeNode * declaration_list(void);
TreeNode * declaration(void);
TreeNode * params(void);
TreeNode * param_list(TreeNode * k);
TreeNode * param(TreeNode * k);
TreeNode * compound_stmt(void);
TreeNode * local_declaration(void);
TreeNode * statement_list(void);
TreeNode * statement(void);
TreeNode * expression_stmt(void);
TreeNode * selection_stmt(void);
TreeNode * iteration_stmt(void);
TreeNode * return_stmt(void);
TreeNode * expression(void);
TreeNode * var(void);
TreeNode * simple_expression(TreeNode * k);
TreeNode * additive_expression(TreeNode * k);
TreeNode * term(TreeNode * k);
TreeNode * factor(TreeNode * k);
TreeNode * call(TreeNode * k);
TreeNode * args(void);
2.1.4 测试程序说明
根据附录A后面的例子,程序输入两个整数,计算并打印出它们的最大公因子,保存为a.txt。
/* A program to perform Eucild's
Algorithm to compute gcd. */
int gcd (int u, int v)
{
if (v==0)
return u;
else return
gcd(v,u-u/v*v); /* u-u/v*v== u mod v */
}
void main(void)
{
int x;
int y;
x=input();
y=input();
output(gcd(x,y));
}
3. 程序代码实现
按文件列出主要程序代码, 添加必要的注释.
Scanner.cpp:
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <string>
#include "scanner.h"
#include<vector>
using namespace std;
/*
Name: 词法分析器
Copyright:
Author: XXX
Date: 19-05-14 12:00
Description: 提取出token
*/
Scanner :: Scanner()
{
scanSuccess = true;
charIndex = 0;
str = "";
commentFlag = true;
sourseString = "";
lineCount = 0;
}
void Scanner :: scan()
{
cout<<"开始词法分析..."<<endl;
bool doubleSym = false;
getSourseStringFromFile("sourseFile.txt");
int state = START;
lineCount = 0;
char ch;
while(state<6)
{
ch = getNextChar();
if('\0'==ch)
{
Token t;
t.lineNo = lineCount;
t.tokenString = "";
t.tokenType = ENDFILE;
tokenList.push_back(t);
break;
}
if(START==state)//初始状态和空格
{
state = charType(ch);
if(state!=START)
str += ch;
}
else if(INNUM==state)//digit
{
state = charType(ch);
if(state!=INNUM)
state = DONE;
else
str += ch;
}
else if(INID==state)//letter
{
state = charType(ch);
if(state!=INID)
state = DONE;
else
str += ch;
}
else if(INDBSYM==state)//除了<>=!之外的各种符号
{
if('='==ch)
{
str += ch;
doubleSym = true;
}
else
doubleSym = false;
state = DONE;
}
if(DONE==state)//接收状态
{
int tp = 0;
if('\n'==ch)
tp = 1;
Token t;
t.lineNo = lineCount-tp;
t.tokenString = str;
t.tokenType = returnTokenType(str);
tokenList.push_back(t);
if(ERROR==t.tokenType)
scanSuccess = false;
int lastState = charType(str[str.length()-1]);
if(lastState==INNUM || lastState==INID || (lastState==INDBSYM && doubleSym==false))
backToLastChar();
str = "";
state = START;
if(doubleSym==true)
doubleSym = false;
}
}
if(scanSuccess==false)
cout<<"词法分析出错!"<<endl;
else
cout<<"词法分析成功了!"<<endl;
printToken();//输出Token到文件Token.txt中
}
Token Scanner :: getTokenAt(int tokenIndex)
{
Token token;
token.lineNo = lineCount;
token.tokenString = "";
token.tokenType = ENDFILE;
if(tokenIndex<tokenList.size())
{
token = tokenList.at(tokenIndex++);
}
return token;
}
void Scanner :: getSourseStringFromFile(string path)
{
ifstream fin(path.c_str());
string temp;
sourseString = "";
while(getline(fin,temp))
{
sourseString += temp;
sourseString += '\n';
}
fin.close();
charIndex = 0;
}
void Scanner :: deleteComments()
{
cout<<"正在删除注释..."<<endl;
ofstream fout_Sourse("sourseFile.txt");
int state = 1;
char ch;
while(state<6)
{
ch = getNextChar();
if('\0'==ch)//文件结束
break;
if(1==state)
{
if('/'==ch)
state = 2;
else
{
state = 1;
fout_Sourse<<ch;
}
}
else if(2==state)
{
if('*'==ch)
{
state = 3;
commentFlag = false;
}
else
{
state = 1;
fout_Sourse<<"/"<<ch;
}
}
else if(3==state)
{
if('*'==ch)
state = 4;
else
{
state = 3;
}
}
else if(4==state)
{
if('*'==ch)
state = 4;
else if('/'==ch)
state = 5;
else
{
state = 3;
}
}
if(5==state)//结束状态,处理
{
commentFlag = true;
state = 1;
}
}
if(!commentFlag)
{
cout<<"注释错误,没有结束符!"<<endl;
scanSuccess = false;
}
else
cout<<"注释已经成功删除!"<<endl;
}
TokenType Scanner :: returnTokenType(string s)//返回Token的类型
{
TokenType t;
if(s=="else")
{
t = ELSE;
}
else if(s=="if")
{
t = IF;
}
else if(s=="int")
{
t = INT;
}
else if(s=="return")
{
t = RETURN;
}
else if(s=="void")
{
t = VOID;
}
else if(s=="while")
{
t = WHILE;
}
else if(s=="+")
{
t = PLUS;
}
else if(s=="-")
{
t = MINUS;
}
else if(s=="*")
{
t = TIMES;
}
else if(s=="/")
{
t = OVER;
}
else if(s=="<")
{
t = LT;
}
else if(s=="<=")
{
t = LEQ;
}
else if(s==">")
{
t = GT;
}
else if(s==">=")
{
t = GEQ;
}
else if(s=="==")
{
t = EQ;
}
else if(s=="!=")
{
t = NEQ;
}
else if(s=="=")
{
t = ASSIGN;
}
else if(s==";")
{
t = SEMI;
}
else if(s==",")
{
t = COMMA;
}
else if(s=="(")
{
t = LPAREN;
}
else if(s==")")
{
t = RPAREN;
}
else if(s=="[")
{
t = LMBRACKET;
}
else if(s=="]")
{
t = RMBRACKET;
}
else if(s=="{")
{
t = LBBRACKET;
}
else if(s=="}")
{
t = RBBRACKET;
}
else if(s=="/*")
{
t = LCOMMENT;
}
else if(s=="*/")
{
t = RCOMMENT;
}
else if(2==charType(s[s.length()-1]))
{
t = NUM;
}
else if(3==charType(s[s.length()-1]))
{
t = ID;
}
else
{
t = ERROR;
}
return t;
}
DFAState Scanner :: charType(char c)//返回字符的类型
{
if(' '==c || '\n'==c || '\t'==c ||'\r'==c)
return START;
else if(c>='0'&&c<='9')
return INNUM;
else if((c>='A'&&c<='Z')||(c>='a'&&c<='z'))
return INID;
else if(c=='<' || c=='>' || c=='=' || c=='!')
return INDBSYM;
else
return DONE;
}
char Scanner :: getNextChar()
{
if(charIndex<sourseString.length())
{
char ch = sourseString[charIndex];
charIndex++;
if('\n'==ch)
lineCount++;
return ch;
}
else
return '\0';
}
void Scanner :: backToLastChar()
{
if(charIndex>0)
{
char ch = sourseString[charIndex-1];
charIndex--;
if('\n'==ch)
lineCount--;
}
}
void Scanner :: printToken()
{
ofstream fout_Token("Token.txt");
ifstream fin("sourseFile.txt");
string temp;
int lineCount = 0;
int index = 0;
while(getline(fin,temp))
{
fout_Token<<lineCount<<": ";
fout_Token<<temp<<endl;
while(index<tokenList.size())
{
Token t = tokenList.at(index);
if(lineCount==t.lineNo)
{
fout_Token<<" "<<lineCount<<": [";
index++;
int width = 10;
string headS = " ";
if(t.tokenType>=1&&t.tokenType<=6)//关键字
{
string tp = "";
for(int i = 0; i<width-t.tokenString.length(); i++)
tp += " ";
fout_Token<<"keyWord]:"<<headS<<t.tokenString<<" "<<tp<<"["<<tokenTypeString[t.tokenType]<<"]"<<endl;
}
else if(t.tokenType>=7&&t.tokenType<=27)//符号
{
string tp = "";
for(int i = 0; i<width-t.tokenString.length(); i++)
tp += " ";
fout_Token<<"symbols]:"<<headS<<t.tokenString<<" "<<tp<<"["<<tokenTypeString[t.tokenType]<<"]"<<endl;
}
else if(t.tokenType==28)//NUM
{
string tp = "";
for(int i = 0; i<width-t.tokenString.length(); i++)
tp += " ";
fout_Token<<" NUM]:"<<headS<<t.tokenString<<" "<<tp<<"["<<tokenTypeString[t.tokenType]<<"]"<<endl;
}
else if(t.tokenType==29)//ID
{
string tp = "";
for(int i = 0; i<width-t.tokenString.length(); i++)
tp += " ";
fout_Token<<" ID]:"<<headS<<t.tokenString<<" "<<tp<<"["<<tokenTypeString[t.tokenType]<<"]"<<endl;
}
else if(t.tokenType==30)//错误
{
string tp = "";
for(int i = 0; i<width-t.tokenString.length(); i++)
tp += " ";
fout_Token<<" error]:"<<headS<<t.tokenString<<" "<<tp<<"["<<tokenTypeString[t.tokenType]<<"]"<<endl;
}
else if(t.tokenType==ENDFILE)//结束
{
fout_Token<<" "<<lineCount<<": ";
fout_Token<<t.tokenString<<" "<<"["<<tokenTypeString[t.tokenType]<<"]"<<endl;
}
}
if(lineCount<t.lineNo)
break;
}
lineCount++;
}
fin.close();
fout_Token.close();
}
scanner.h:
#include<string>
#include<vector>
using namespace std;
//定义的Token的类型(31种),分别对应于else、if、int、return、void、while、+、-、*、/、<、<=、>、>=、==、!=、=、;、,、(、)、[、]、{、}、/*、*/、num、id、错误、结束
typedef enum
{
ELSE = 1,IF,INT,RETURN,VOID,WHILE,
PLUS,MINUS,TIMES,OVER,LT,LEQ,GT,GEQ,EQ,NEQ,ASSIGN,SEMI,COMMA,LPAREN,RPAREN,LMBRACKET,RMBRACKET,LBBRACKET,RBBRACKET,LCOMMENT,RCOMMENT,
NUM,ID,ERROR,ENDFILE
} TokenType;
typedef enum
{
START = 1, INNUM, INID, INDBSYM, DONE
} DFAState;
//定义的Token结构体,包括类型、对应的串、所在代码的行号
st
展开阅读全文