编译原理-词法分析器实验报告.doc

资源描述

一．实验题目和要求。题目：设计PL / 0语言的词法分析器。要求：单词（关键字）; begin end if then else while write read do call const var procedure repeat until 运算符：+ - * / := = <>(#) < <= > >= 界符：， . ； ( ) 标识符：字母打头的字母数字串，长度不超过10. 常数：整数，所含数字的个数不超过14. 返回单词形式：二元式（种别，属性值）二．实验平台。 PC机： Windows XP操作系统编程环境： Turbo C 2.0 编程语言： C语言三．实验算法说明。定义2个全局变量char ch和char strtoken分别用来存放最新读入的源程序字符和构成单词符号的字符串。然后用各个子函数的功能和读文件内容的操作，实现对源程序的词法分析，分别进行关键字，标识符，常数，运算符和界符的分析确认，然后将对应的二元式结果输出。首先用户输入要读文件的名称，然后程序会把文件的内容读到定义的够大的字符数组str（相当于词法分析的一个缓冲区）里，然后开始从str里一个一个的读出字符并放在专门用来暂时存放单词数组strtoken里进行词法分析，若符合语言的规则，则打印出相关的说明信息，即该字符或字符串的二元式信息，可以清楚的明白该字符的种别和属性值。如果读入的字符或字符串是非法的，即不是语法规定或是没有意义的字符或字符串，程序会提示相关的错误信息并进行一定的说明。四．程序子函数功能说明。 1. void getChar( ) 将下一个输入字符读到ch中，搜索指示器前移一个字符的位置。 2. void getBc( ) 检查ch中的字符是否为空白，若是，则调用getChar( )，直到ch中进入一个非空的字符。 3. void retract( ) 将搜索指示器回调一个字符位置，将ch置为空白字符。 4. void concat( ) 将ch中的字符连接到strtoken之后。 5.int Isletter( ) 子函数过程，判断ch中的字符是否为字母。 6.int Isdigit( ) 子函数过程，判断ch中的字符是否为数字。 7. int Iskey_ch( ) 子函数过程，判断strtoken中的字符串是否为程序设计语言的关键字，若是，打印并返回它的二元式。 8. void Isident( ) 子函数过程，判断strtoken中的字符串是否为程序设计语言规定的标识符。 9. void empty( ) 将strtoken字符数组置空，然后接收下一个字符并分析。 10. void Isnum( ) 子函数过程，判断strtoken中的字符串是否为数字串常量，若是，打印并返回它的二元式。 11. void error( ) 子函数过程，判断ch中的字符是否为非法字符，若是，则打印该非法字符并提示。 12. int Isjiefu( ) 子函数过程，判断strtoken中的字符串是否为程序设计语言语法规定的界符。 13. int Issuanfu( ) 子函数过程，判断strtoken中的字符串是否为程序设计语言语法规定的运算符号。 14. void readfile( ) 子函数过程，用户输入要分析的文件名，然后读文件操作，把文件内容读入到字符数组。五．程序源代码（C语言）： #include <stdio.h> #include <conio.h> #include <stdlib.h> typedef char CH1[15]; typedef char CH2[2]; typedef char NUM[25]; CH1 key_ch[15]={"begin","end","if","then","else","while","write","read", "do","call","const","var","procedure","repeat","until"}; CH1 Id[15]={''}; //定义数组，存放读出的标识符 NUM num[20]; //定义数组，存放读出的常数 CH2 key1[12]={"+","-","*","/",":=","=","<","<=",">",">=","<>","#"}; CH1 key1name[12]={"Add","Subtract","Multiple","Devide","Evaluate", "Equal","Smaller","Small_equal","Bigger","Bigger_equal", "Not_equal","Not_equal"}; char key2[5]={'，'，'.'，'；'，'（'，'）'}; CH1 key2name[5]={"Comma","Period","Semicolon","Lparenthesis", "Rparenthesis"}; char ch; int str_cur=0; //存放文件数组的指示器 int strt_cur=0; //存放用来分析数组的指示器 int id_pi=0; //标识符数组指示器 int num_pi=0; //常数数组指示器 char str[100]; //用来存放文件内容的数组 char strtoken[15]={''}; FILE *fp; char filename[10]; void getChar( ) { ch=str[str_cur]; str_cur++; } void getBc( ) { if (ch==' ') while(ch==' ') getChar( ); } void readtxt( ) { int i=0; FILE *fp; fp=fopen(filename,"r"); if(fp==NULL) //出错信息，如果文件不存在则提示错误 { printf("\nCan not open the file!\n"); getch(); exit(0); } while(!feof(fp)) str[i++]=fgetc(fp); //读文件内容并复制 fclose(fp); str[--i]='\0'; //结束文件内容 } void retract( ) { str_cur--; ch=' '; } void concat( ) { strtoken[strt_cur]=ch; strt_cur++; } int Isletter( ) { if(ch>='a'&&ch<='z'|| ch>='A'&&ch<='Z') return 1; return 0; } int Isdigit( ) { if(ch>='0'&&ch<='9') return 1; return 0; } int Iskey_ch( ) { int i; int flag=0; for(i=0;i<15;i++) if(strcmp(strtoken,key_ch[i])==0) { printf("Key_word: %s ($%s,--)\n\n",strtoken,strtoken); flag=1; break; } return flag; } void Isident( ) { strcpy(Id[id_pi],strtoken); printf("Identity: %s ($ID,%s)\n\n",Id[id_pi],Id[id_pi]); id_pi++; } void empty( ) //把用来分析文件内容的数组置空 { int k; for(k=0;k<15;k++) strtoken[k]=''; strt_cur=0; } void Isnum( ) //常数判断函数 { strcpy(num[num_pi],strtoken); printf("Const number: %s ($INT,%s)\n\n",num[num_pi], num[num_pi]); num_pi++; } void error( ) //非法字符错误提示函数 { printf("Illegal chars: %c\n\n",ch); } int Isjiefu( ) //界符判断函数 { int i，flag=0; for(i=0;i<5;i++) if(ch==key2[i]) { printf("Bound_Symbol: %c ($%s,--)\n\n",key2[i],key2name[i]); flag=1; break; } return flag; } int Issuanfu( ) //运算符判断函数 { int i; int flag=0; if(ch==':') { concat( ); getChar( ); if(ch=='=') { concat( ); printf("Operator: %s ($%s,--)\n\n",strtoken,key1name[4]); flag=1; return flag; } else printf("Illegal char: %s\n\n",strtoken); } else if(ch=='<') { concat( ); getChar( ); if(ch=='>') { concat( ); printf("Operator: %s ($%s,--)\n\n",strtoken,key1name[10]); flag=1; return flag; } else if(ch=='=') { concat( ); printf("Operator: %s ($%s,--)\n\n",strtoken,key1name[7]); flag=1; return flag; } else { retract( ); printf("Operator: %s ($%s,--)\n\n",strtoken,key1name[6]); flag=1; return flag; } } else if(ch=='>') { concat( ); getChar( ); if(ch=='=') { concat( ); printf("Operator: %s ($%s,--)\n\n",strtoken,key1name[9]); flag=1; return flag; } else { retract( ); printf("Operator: %s ($%s,--)\n\n",strtoken,key1name[8]); flag=1; return flag; } } else { concat( ); for(i=0;i<12;i++) { if(strcmp(strtoken,key1[i])==0) { printf("Operator: %s ($%s,--)\n\n",strtoken,key1name[i]); flag=1; break; } } return flag; } } main( ) { int a1,a2; clrscr( ); printf("\nPlease enter the name of the file: "); scanf("%s",filename); getchar( ); readfile( ); printf("\nThe source_programme codes are: \n\n%s\n\n",str); while(str_cur<=(strlen(str)-1)) { a1=0; a2=0; getChar( ); getBc( ); if( Isletter( ) ) { while( Isletter( ) ||Isdigit( ) ) { concat( ); getChar( ); } retract( ); if(strlen(strtoken)>10) printf("\nThe length of identity exceeds 10 !!!\n\n"); if(!Iskey_ch( )) Isident( ); empty( ); } else if( Isdigit( )) { while(Isdigit( )) { concat( ); getChar(); } retract( ); if(strlen(strtoken)>14) printf("\nThe length of number exceeds 14 !!!\n\n"); Isnum( ); empty( ); } else { a1=Isjiefu( ); if(!a1) { a2=Issuanfu( ); if(!a2) error( ); empty( ); } } } getch( ); } 六．运行实例及结果： 1.读入文件weihua1.c进行分析： Please enter the name of the file: weihua1.c The source_programme codes are: begin end procedure until ab3s pointer2 12 132 , ; ) + / <= Key_word: begin ($begin,--) Key_word: end ($end,--) Key_word: procedure ($procedure,--) Key_word: until ($until,--) Identity: ab3s ($ID,ab3s) Identity: pointer2 ($ID,pointer2) Const number: 12 ($INT,12) Const number: 132 ($INT,132) Bound_Symbol: , ($Comma,--) Bound_Symbol: ; ($Semicolon,--) Bound_Symbol: ) ($Rparenthesis,--) Operator: + ($Add,--) Operator: / ($Devide,--) Operator: <= ($Small_equal,--) Operator: := ($Evaluate,--) 2.读入文件weihua2.c进行分析： Please enter the name of the file: weihua2.c The source_programme codes are: if (a>=0) b=1; else b=2; Key_word: if ($if,--) Bound_Symbol: ( ($Lparenthesis,--) Identity: a ($ID,a) Operator: >= ($Bigger_equal,--) Const number: 0 ($INT,0) Bound_Symbol: ) ($Rparenthesis,--) Identity: b ($ID,b) Operator: = ($Equal,--) Const number: 1 ($INT,1) Bound_Symbol: ; ($Semicolon,--) Key_word: else ($else,--) Identity: b ($ID,b) Operator: = ($Equal,--) Const number: 2 ($INT,2) Bound_Symbol: ; ($Semicolon,--) 3.读入文件weihua3.c进行分析： Please enter the name of the file: weihua3.c The source_programme codes are: aaabbbcccdd 1234567890123456 $ % & The length of identity exceeds 10 !!! Identity: aaabbbcccdd ($ID,aaabbbcccdd) The length of number exceeds 14 !!! Const number: 1234567890123456 ($INT,1234567890123456) Illegal chars: $ Illegal chars: % Illegal chars: & 七．实验中遇到的问题及感想。在编写词法分析器的程序中，几个子函数的功能还是比较容易实现的，但是要把它们的功能连在一起，实现对程序源代码的词法分析就困难了，例如在分析运算符的时候，有的运算符是由2个符号组成的，有的是1个，在读入一个字符的时候还要进行超前搜索，看看第二个字符是否可以和第一个字符组成一个运算符。但是在过程中遇到的问题还是没有真正清楚的明白分析的过程和一些应该考虑的问题，经过自己专心的研究和思考，最终完成了实验，实现了词法分析器的功能，对词法分析的整个过程和一些关键步骤以及出错信息的提示有了清楚的明白和理解。经过这次编译原理词法分析器的实验，不但是我更加对C语言有了进一步实践，获得了不少的实践经验，更重要的是是我对编译原理中词法分析器工作过程和原理的有了进一步的深刻理解，相信在以后编译原理的学习过程中有新的认识和体会。

展开阅读全文