资源描述
编译原理-词法分析器设计
一. 设计阐明及设计规定
普通来说,编译程序整个过程可以划分为五个阶段:词法分析、语法分析、中间代码生成、优化和目的代码生成。本课程设计即为词法分析阶段。词法分析阶段是编译过程第一种阶段。这个阶段任务是从左到右一种字符一种字符地读入源程序,对构成源程序字符流进行扫描和分解,从而辨认出一种个单词(也称单词符号或符号)。如保存字(核心字或基本字)、标志符、常数、算符和界符等等。
二. 设计中有关核心字阐明
1. 基本字:也称核心字,如C语言中 if ,else ,while ,do ,for,case,break,return 等。
2. 标志符:用来表达各种名字,如常量名、变量名和过程名等。
3. 常数:各种类型常数,如12,6.88,和“ABC”等。
4. 运算符:如 + ,- ,* ,/ ,%,< ,> ,<= ,>= 等。
5. 界符,如逗点,冒号,分号,括号,# ,〈〈 , 〉〉等。
三 、程序分析
词法分析是编译第一种阶段,它重要任务是从左到右逐个字符地对源程序进行
扫描,产生一种个单词序列,用以语法分析。词法分析工作可以是独立一遍,把字符流源程序变为单词序列,输出在一种中间文献上,这个文献做为语法分析程序输入而继续编译过程。然而,更普通状况,常将词法分析程序设计成一种子程序,每当语法分析程序需要一种单词时,则调用该子程序。词法分析程序每得到一次调用,便从源程序文献中读入某些字符,直到辨认出一种单词,或说直到下一种单词第一种字符为止。
四 、模块设计
下面是程序流程图
五 、程序简介
在程序当前目录里建立一种文本文档,取名为infile.txt,所有需要分析程序都写在此文本文档里,程序结尾必要以“@”标志符结束。程序成果输出在同一种目录下,文献名为outfile.txt,此文献为自动生成。本程序所输出单词符号采用如下二元式表达:(单词种别,单词自身值)如程序输出成果 (57,"#")(33,"include")(52,"<")(33,"iostream") 等。
程序功能:(1) 能辨认C语言中所有核心字(共32个)(单词种别分别为1 — 32 ,详情见程序代码有关某些,下同)
(2) 能辨认C语言中自定义标示符 (单词种别为 33)
(3) 能辨认C语言中常数 (单词种别为0)
(4) 能辨认C语言中几乎所有运算符(单词种别分别为41 — 54)
(5) 能辨认C语言中绝大多数界符 (单词种别分别为 55 — 66)
六 、运营成果
输入文献infile.txt
运营成果(输出文献 outfile.txt)
七 、设计体会
八、附录某些(程序代码)
单词符号
类别编码
单词符号
类别编码
单词符号
类别编码
单词符号
类别编码
if
3
float
21
+
31
#
62
then
4
short
22
-
32
.
63
else
5
unsigned
23
*
33
,
64
while
6
continue
24
/
34
:
65
do
7
for
25
<
35
>=
39
begin
8
signed
26
>
36
<=
38
end
9
void
27
=
37
==
41
long
10
default
28
:=
51
!=
42
switch
11
goto
29
(
52
%
40
case
12
sizeof
30
)
53
标记符
1
enum
13
volatile
43
;
54
常数
2
register
14
auto
44
[
55
typedef
15
double
45
]
56
char
16
int
46
{
57
extern
17
struct
47
}
58
return
18
break
48
<<
59
union
19
static
49
>>
60
const
20
“
61
提示:文献打开和读写函数:
FILE *fp,*out; //定义文献指针
fp=fopen("infile.txt","r"))
如果打开文献"infile.txt"失败,则函数返回NULL,即fp=NULL,第二个参数“r”表达以只读方式打开,如果为”w”, 则以可写方式打开
调用fgetc(fp)这个函数一次从fp所指向文献读取一种字符
char ch=fgetc(fp);
想文献写字符函数为fprintf(FILE * fp,写进内容)
例如下面调用fprintf(outfile,"abcd\n")是把字符串“abcd”写到文献outfile末尾,并且在背面加上了一种换行标志
文献读写完毕后要用函数fclose(fp)关闭。
源代码
#include "stdio.h"
#include "string.h"
#include "ctype.h"
void analzid(FILE *output,char *p)
{
int i=0;
int count=0;
if (isalpha(p[0]))
{if (strcmp(p,"if")==0) {fprintf(output,"(3,if)\n");}
else if(strcmp(p,"then")==0) {fprintf(output,"(4,then)\n");}
else if(strcmp(p,"else")==0) {fprintf(output,"(5,else)\n");}
else if(strcmp(p,"while")==0) {fprintf(output,"(6,while)\n");}
else if(strcmp(p,"do")==0) {fprintf(output,"(7,do)\n");}
else if(strcmp(p,"begin")==0) {fprintf(output,"(8,begin)\n");}
else if(strcmp(p,"end")==0) {fprintf(output,"(9,end)\n");}
else if(strcmp(p,"long")==0) {fprintf(output,"(10,long)\n");}
else if(strcmp(p,"switch")==0) {fprintf(output,"(11,switch)\n");}
else if(strcmp(p,"case")==0) {fprintf(output,"(12,case)\n");}
else if(strcmp(p,"enum")==0) {fprintf(output,"(13,enum)\n");}
else if(strcmp(p,"register")==0) {fprintf(output,"(14,register)\n");}
else if(strcmp(p,"typedef")==0) {fprintf(output,"(15,typedef)\n");}
else if(strcmp(p,"char")==0) {fprintf(output,"(16,char)\n");}
else if(strcmp(p,"extern")==0) {fprintf(output,"(17,extern)\n");}
else if(strcmp(p,"return")==0) {fprintf(output,"(18,return)\n");}
else if(strcmp(p,"union")==0) {fprintf(output,"(19,union)\n");}
else if(strcmp(p,"const")==0) {fprintf(output,"(20,const)\n");}
else if(strcmp(p,"float")==0) {fprintf(output,"(21,float)\n");}
else if(strcmp(p,"short")==0) {fprintf(output,"(22,short)\n");}
else if(strcmp(p,"unsigned")==0) {fprintf(output,"(23,unsigned)\n");}
else if(strcmp(p,"continue")==0) {fprintf(output,"(24,continue)\n");}
else if(strcmp(p,"for")==0) {fprintf(output,"(25,for)\n");}
else if(strcmp(p,"signed")==0) {fprintf(output,"(26,signed)\n");}
else if(strcmp(p,"void")==0) {fprintf(output,"(27,void)\n");}
else if(strcmp(p,"default")==0) {fprintf(output,"(28,default)\n");}
else if(strcmp(p,"goto")==0) {fprintf(output,"(29,goto)\n");}
else if(strcmp(p,"sizeof")==0) {fprintf(output,"(30,sizeof)\n");}
else if(strcmp(p,"volatile")==0) {fprintf(output,"(43,volatile)\n");}
else if(strcmp(p,"auto")==0) {fprintf(output,"(44,auto)\n");}
else if(strcmp(p,"double")==0) {fprintf(output,"(45,double)\n");}
else if(strcmp(p,"int")==0) {fprintf(output,"(46,int)\n");}
else if(strcmp(p,"struct")==0) {fprintf(output,"(47,struct)\n");}
else if(strcmp(p,"break")==0) {fprintf(output,"(48,break)\n");}
else if(strcmp(p,"static")==0) {fprintf(output,"(49,static)\n");}
else {fprintf(output,"(1,%s)\n",p);}
}
else
{for(;i<(int)strlen(p);i++) if(isdigit(p[i])) count++;
if (count==(int)strlen(p))
{fprintf(output,"(2,%s)\n",p);}
else
if (p[0]=='_'&&(isalpha(p[1])))
{fprintf(output,"(1,%s)\n",p);}
else {fprintf(output,"%s 未定义\n",p);}
}
}
void analzsy(FILE *outfile,char *p)
{
if (strcmp(p,"=")==0) {fprintf(outfile,"(37,=)\n");}
else if(strcmp(p,"+")==0) {fprintf(outfile,"(31,+)\n");}
else if(strcmp(p,"-")==0) {fprintf(outfile,"(32,-)\n");}
else if(strcmp(p,"*")==0) {fprintf(outfile,"(33,*)\n");}
else if(strcmp(p,"/")==0) {fprintf(outfile,"(34,/)\n");}
else if(strcmp(p,"(")==0) {fprintf(outfile,"(52,()\n");}
else if(strcmp(p,")")==0) {fprintf(outfile,"(53,))\n");}
else if(strcmp(p,"[")==0) {fprintf(outfile,"(55,[)\n");}
else if(strcmp(p,"]")==0) {fprintf(outfile,"(56,])\n");}
else if(strcmp(p,"{")==0) {fprintf(outfile,"(57,{)\n");}
else if(strcmp(p,"}")==0) {fprintf(outfile,"(58,})\n");}
else if(strcmp(p,"<<")==0) {fprintf(outfile,"(59,<<)\n");}
else if(strcmp(p,">>")==0) {fprintf(outfile,"(60,>>)\n");}
else if(strcmp(p,"'")==0) {fprintf(outfile,"(61,')\n");}
else if(strcmp(p,"#")==0) {fprintf(outfile,"(62,#)\n");}
else if(strcmp(p,".")==0) {fprintf(outfile,"(64,.)\n");}
else if(strcmp(p,"*")==0) {fprintf(outfile,"(33,*)\n");}
else if(strcmp(p,"/")==0) {fprintf(outfile,"(34,/)\n");}
else if(strcmp(p,"%")==0) {fprintf(outfile,"(40,%)\n");}
else if(strcmp(p,",")==0) {fprintf(outfile,"(64,,)\n");}
else if(strcmp(p,":")==0) {fprintf(outfile,"(65,:)\n");}
else if(strcmp(p,";")==0) {fprintf(outfile,"(54,;)\n");}
else if(strcmp(p,">")==0) {fprintf(outfile,"(36,>)\n");}
else if(strcmp(p,"<")==0) {fprintf(outfile,"(35,<)\n");}
else if(strcmp(p,">=")==0) {fprintf(outfile,"(39,>=)\n");}
else if(strcmp(p,"<=")==0) {fprintf(outfile,"(38,<=)\n");}
else if(strcmp(p,"==")==0) {fprintf(outfile,"(41,==)\n");}
else if(strcmp(p,"!=")==0) {fprintf(outfile,"(42,!=)\n");}
else if(strcmp(p," ")==0) ;
else if(strcmp(p,"\n")==0) ;
else {fprintf(outfile,"%s 未定义\n",p);}
}
void main()
{FILE *fp,*out;
int i=0,x=0,y=0;
int EA=0;
char ch,str[10000],idstr[10],systr[2];
if((fp=fopen("infile.txt","r"))==NULL)
{printf("Can not open infile!\n");
exit(0);}
if((out=fopen("outfile.txt","w"))==NULL)
{printf("Can not open outfile!\n");
exit(0);}
ch=fgetc(fp);
while(ch!=EOF)
{str[i]=ch;str[i+1]='\0';i++;ch=fgetc(fp);}
i=0;
while(1)
{
if(str[i]=='@') break;
else
if((str[i]>='a'&&str[i]<='z')||(str[i]>='A'&&str[i]<='Z')||
(str[i]>='0'&&str[i]<='9')||(str[i]=='_'))
{idstr[x]=str[i];idstr[x+1]='\0';x++;i++;EA=1;}
else
{
x=0;
if((strlen(idstr)!=0)&&(EA)) analzid(out,idstr);
idstr[x]='\0';
if(str[i]<'0'||(str[i]>'9'&&str[i]<'A')||
(str[i]>'Z'&&str[i]<'a')||str[i]>'z')
{
if(str[i]!='!'&&str[i]!='='&&str[i]!='<'&&str[i]!='>')
{
systr[y]=str[i];systr[y+1]='\0';analzsy(out,systr);i++;
}
else
if((str[i]=='!'&&str[i+1]=='=')||(str[i]=='='&&str[i+1]=='=')||
(str[i]=='>'&&str[i+1]=='=')||(str[i]=='<'&&str[i+1]=='=')||
(str[i]=='<'&&str[i+1]=='<')||(str[i]=='>'&&str[i+1]=='>'))
{
systr[y]=str[i];systr[y+1]=str[i+1];systr[y+2]='\0';
analzsy(out,systr);i++;i++;
}
else
{
systr[y]=str[i];systr[y+1]='\0';analzsy(out,systr);i++;
}}}}
printf("-----所有成果已经存入outfile.txt文档-----\n");
fprintf(out,"--------完毕--------");
fclose(fp);
fclose(out);
}
展开阅读全文