资源描述
计算机科学与通信工程学院
编译原理实验报告
题目: 1.词法分析器
2. LL(1)分析器
3. LR(0)分析器
班级:
姓名:
学号:
指导老师:
2017年 月
目录
一、 实验题目 1
二、 实验目的和要求 1
三、 代码实现 2
四、 总结 27
一、 实验题目
1. 词法分析器
分析一段程序代码,将代码中的单词符号分解出来,并对其进行检查,输出token表和error表
2. LL(1)文法分析器
分析给定文法。求出文法的FIRST集,FOLLOW集,并构建分析表,对给定输入串进行分析。
3. LR(0)文法分析器
分析给定文法。用Ꜫ_CLOSURE方法构造文法的LR(0)项目集规范族,根据状态转换函数GO构造出文法的DFA,并转换为分析表,对给定输入串进行分析。
二、 实验目的和要求
1. 学会词法分析器的实现思路。
2. 学会求解FIRST集, FOLLOW集,构造LL(1)分析表。
3. 学会Ꜫ_CLOSURE方法, 状态转换函数GO, 构造LR(0)分析表。
三、 代码实现
1. 词法分析器
program.txt 中存放要分析的文法:
E->TR
R->+TR|-TR|~
T->FG
G->*FG|/FG|~
F->(E)|i
代码:
KEYWORD_LIST = ['while', 'if', 'else', 'switch', 'case']
SEPARATOR_LIST = [';', ':', ',', '(', ')', '[', ']', '{', '}']
OPERATOR_LIST1 = ['+', '-', '*']
OPERATOR_LIST2 = ['<=', '<', '==', '=', '>', '>=']
CATEGORY_DICT = {
# KEYWORD
"while": {"while": ""},
"if": {"if": ""},
"else": {"else": ""},
"switch": {"switch": ""},
"case": {"case": ""},
# OPERATOR
"+": {"+": ""},
"-": {"-": ""},
"*": {"*": ""},
"<=": {"relop": "LE"},
"<": {"relop": "LT"},
">=": {"relop": "GE"},
">": {"relop": "GT"},
"==": {"relop": "EQ"},
"=": {"=": ""},
# SEPARATOR
";": {";": ""},
":": {":": ""},
",": {",": ""},
"(": {"(": ""},
")": {")": ""},
"[": {"]": ""},
"]": {"]": ""},
"{": {"{": ""},
"}": {"}": ""},
}
CONSTANTTABLE = []
TOKENTABLE = []
OPERATORTABLE = []
KEYWORDTABLE = []
SEPARATORTABLE = []
UNDEFINEDTABLE = []
# READ read_, method):
temp_str = ""
try:
file = open(path, method)
for line in file:
line = line.replace('\n', " ")
temp_str += line
temp_str = str(temp_str)
except IOError as e:
print(e)
exit()
finally:
()
return temp_str.strip() + " "
# GETBE
def getbe():
global token
getchar()
token = ""
return
# GETCHAR
def getchar():
global character
global location
while all_string[location] == " ":
location = location + 1
character = all_string[location]
return character
# LINK TOKEN
def concatenation():
global token
global character
token = token + character
# IS NUMBER
def digit():
if '0' <= character <= '9':
return True
return False
# IS ALPHABET
def letter():
if 'A' <= character <= 'Z' or 'a' <= character <= 'z':
return True
return False
# IS IDENTIFIER
def reserve():
if token in KEYWORD_LIST:
return CATEGORY_DICT[token]
else:
return 0
# RETRACT
def retract():
global location
global character
# location = location - 1
character = ""
return
# MAIN FUNCTION
def main():
global token
global character
global location
s = getchar()
getbe()
if 'a' <= s <= 'z' or 'A' <= s <= 'Z':
while letter() or digit():
concatenation()
location = location + 1
character = all_string[location]
retract()
c = reserve()
if c == 0:
TOKENTABLE.append(token)
print("这是标识符:{'", token, "':'", TOKENTABLE.index(token), "'}")
else:
KEYWORDTABLE.append(token)
print("这是保留字:", CATEGORY_DICT[token])
elif '0' <= s <= '9':
while digit():
concatenation()
location = location + 1
character = all_string[location]
retract()
CONSTANTTABLE.append(token)
print("这是常数:{'", token, "':'", CONSTANTTABLE.index(token), "'}")
elif s in OPERATOR_LIST1:
location = location + 1
OPERATORTABLE.append(s)
print("这是单操作符:", CATEGORY_DICT[s])
elif s in OPERATOR_LIST2:
location = location + 1
character = all_string[location]
if character == '=':
OPERATORTABLE.append(s + character)
print("这是双操作符:", CATEGORY_DICT[s + character])
else:
retract()
location = location + 1
OPERATORTABLE.append(s)
print("这是单操作符:", CATEGORY_DICT[s])
elif s in SEPARATOR_LIST:
location = location + 1
SEPARATORTABLE.append(s)
print("这是分隔符:", CATEGORY_DICT[s])
else:
location += 1
UNDEFINEDTABLE.append(s)
print("error:undefined identity :'", s, "'")
if __name__ == '__main__':
character = ""
token = ""
all_string = read_file("program.txt", "r")
location = 0
while location + 1 < len(all_string):
main()
print('KEYWORDTABLE:', KEYWORDTABLE)
print('TOKENTABLE:', TOKENTABLE)
print('CONSTANTTABLE:', CONSTANTTABLE)
print('OPERATORTABLE:', OPERATORTABLE)
print('SEPARATORTABLE:', SEPARATORTABLE)
运行结果:
2. LL(1)分析器
program.txt 中存放要分析的文法:
E->TR
R->+TR|-TR|~
T->FG
G->*FG|/FG|~
F->(E)|i
输入串:
i+i*i
代码:
NonTermSet = set() # 非终结符集合
TermSet = set() # 终结符集合
First = {} # First集
Follow = {} # Follow集
GramaDict = {} # 处理过的产生式
Code = [] # 读入的产生式
AnalysisList = {} # 分析表
StartSym = "" # 开始符号
EndSym = '#' # 结束符号为“#“
Epsilon = "~" # 由于没有epsilon符号用“~”代替
# 构造First集
def getFirst():
global NonTermSet, TermSet, First, Follow, FirstA
for X in NonTermSet:
First[X] = set() # 初始化非终结符First集为空
for X in TermSet:
First[X] = set(X) # 初始化终结符First集为自己
Change = True
while Change: # 当First集没有更新则算法结束
Change = False
for X in NonTermSet:
for Y in GramaDict[X]:
k = 0
Continue = True
while Continue and k < len(Y):
if not First[Y[k]] - set(Epsilon) <= First[X]: # 没有一样的就添加,并且改变标志
if Epsilon not in First[Y[k]] and Y[k] in NonTermSet and k > 0: # Y1到Yi候选式都有~存在
Continue = False
else:
First[X] |= First[Y[k]] - set(Epsilon)
Change = True
if Epsilon not in First[Y[k]]:
Continue = False
k += 1
if Continue: # X->~或者Y1到Yk均有~产生式
First[X] |= set(Epsilon)
# FirstA[Y] |= set(Epsilon)
# 构造Follow集
def getFollow():
global NonTermSet, TermSet, First, Follow, StartSym
for A in NonTermSet:
Follow[A] = set()
Follow[StartSym].add(EndSym) # 将结束符号加入Follow[开始符号]中
Change = True
while Change: # 当Follow集没有更新算法结束
Change = False
for X in NonTermSet:
for Y in GramaDict[X]:
for i in range(len(Y)):
if Y[i] in TermSet:
continue
Flag = True
for j in range(i + 1, len(Y)): # continue
if not First[Y[j]] - set(Epsilon) <= Follow[Y[i]]:
Follow[Y[i]] |= First[Y[j]] - set(Epsilon) # 步骤2 FIRST(β)/~ 加入到FOLLOW(B)中。
Change = True
if Epsilon not in First[Y[j]]:
Flag = False
break
if Flag:
if not Follow[X] <= Follow[Y[i]]: # 步骤3 β->~,把FOLLOW(A)加到FOLLOW(B)中
Follow[Y[i]] |= Follow[X]
Change = True
# 构造分析表
def getAnalysisList():
for nonX in NonTermSet:
AnalysisList[nonX] = dict()
row = AnalysisList[nonX]
flag = True
for Y in GramaDict[nonX]:
for term in TermSet:
if term in First[Y[0]] and term in First[nonX]:
row[term] = nonX+'->'+Y
if Epsilon in First[nonX] and flag:
flag = False
for tmp in Follow[nonX]:
row[tmp] = nonX+'->'+Epsilon
print('分析表:')
for nonX in NonTermSet:
print(' ', nonX, AnalysisList[nonX])
# 查询分析表
def findAnalysisList(non, ter):
try:
tmp = AnalysisList[non][ter]
X, Y = tmp.split('->')
except Exception as e:
print('find error ') # M[A,a]为空,发现语法错误
print(e)
pass
return Y
# 显示格式
def display(show_list):
for item in show_list:
print(' %-25s' % item, end='')
print()
# LL(1)分析器
def analyzer():
head = ["Stack", "StackTop", 'NowStr', "InputStr", "Action"]
# inputStr = 'i+i*i' + EndSym
inputStr = input("请输入表达式:") + EndSym
print('分析过程:')
display(head)
stack = []
location = 0
stack.append(EndSym)
stack.append(StartSym)
stack_top = stack.pop()
while stack_top != EndSym and location < len(inputStr):
if stack_top in TermSet and inputStr[location] == stack_top: # x = a != '#',
mess = '匹配,弹出栈顶符号' + stack_top + '并读入输入串的下一符号' + inputStr[location + 1]
display([stack, stack_top, inputStr[location], inputStr[location + 1: len(inputStr)], mess])
location += 1
stack_top = stack.pop()
elif stack_top in NonTermSet and inputStr[location] in TermSet: # x为一非终结符A,则查M[A,a]
result = findAnalysisList(stack_top, inputStr[location])
if result == Epsilon: # M[A,a]中的产生式为A->~,只将A弹出
mess = '弹出栈顶符号' + stack_top + '因M[' + stack_top + ',' + inputStr[location] + ']中为' + stack_top
mess = mess + '->~,故不压栈'
else: # M[A,a]中的产生式右部符号串按逆序逐一压入栈中
mess = '弹出栈顶符号' + stack_top + ',将M[' + stack_top + ',' + inputStr[
location] + ']中的' + stack_top + '->' + result + '的' + result
mess = mess + '逆序压栈'
tmp_list = []
for char in result:
tmp_list.append(char)
tmp_list.reverse()
stack.extend(tmp_list)
display([stack, stack_top, inputStr[location], inputStr[location + 1: len(inputStr)], mess])
stack_top = stack.pop()
else:
break
if stack_top == EndSym and inputStr[location] == EndSym: # x = a = '#' 分析成功,分析器停止工作
display([[], '#', '#', '', '匹配,分析成功'])
print()
print('************************')
print('* Analysis Success *')
print('************************')
else:
print('Analysis Error')
# 读取文法
def readGrammar():
try:
file = open('grammar.txt', 'r')
for line in file:
line = line.replace('\n', "")
Code.append(line)
except IOError as e:
print(e)
exit()
finally:
()
return Code
# 初始化
def init():
global NonTermSet, TermSet, First, Follow, StartSym, Code
Code = readGrammar()
n = int(len(Code))
print('产生式个数:', n)
StartSym = Code[0][0]
print("开始符号:", StartSym)
print('产生式:G[', StartSym, ']:')
for i in range(n):
X, Y = Code[i].split('->')
print(' ', Code[i])
NonTermSet.add(X)
Y = Y.split('|')
for Yi in Y:
TermSet |= set(Yi)
if X not in GramaDict:
GramaDict[X] = set()
GramaDict[X] |= set(Y) # 生成产生式集
TermSet -= NonTermSet
print('非终结符:', NonTermSet)
print('终结符:', TermSet)
getFirst()
getFollow()
print("FIRST集:")
for k in NonTermSet:
print(' FIRST[', k, ']: ', First[k])
print("FOLLOW集:")
for k, v in Follow.items():
print(' FOLLOW[', k, ']: ', v)
TermSet -= set(Epsilon)
TermSet |= set(EndSym)
getAnalysisList()
analyzer()
init()
运行结果:
3. LR(0)分析器
program.txt 中存放要分析的文法:
X->S
S->BB
B->aB
B->b
输入串:
abab
代码:
VN = [] # 非终结符
VT = [] # 终结符
NFA = [] # NFA表
DFA = [] # DFA表
grammar = [] # 读入的文法
doted_grammar = [] # 加点后的文法
VN2Int = {} # 非终结符映射
VT2Int = {} # 终结符映射
action = [] # action表
goto = [] # goto表
DFA_node = [] # DFA节点表
status_stack = [] # 状态栈
symbol_stack = [] # 符号栈
now_state = '' # 栈顶状态
input_ch = '' # 栈顶字符
input_str = '' # 输入串
location = 0 # 输入位置
now_step = 0 # 当前步骤
# 读取文法
def read_grammar():
global grammar
with open(, 'r') as file:
for line in file:
line = line.replace('\n', "")
grammar.append(line)
()
# 找到终结符和非终结符
def find_term_non():
global grammar
n = int(len(grammar))
temp_vt = []
l = 0
for i in range(n):
X, Y = grammar[i].split('->')
if X not in VN:
VN.append(X)
VN2Int.update({X: l})
l += 1
for Yi in Y:
temp_vt.append(Yi)
m = 0
for i in temp_vt:
if i not in VN and i not in VT:
VT.append(i)
VT2Int.update({i: m})
m += 1
VT.append('#')
VT2Int.update({'#': m})
# 在字符串某个位置加点
def add_char2str(grammar_i, i):
grammar_i = grammar_i[0:i] + '.' + grammar_i[i:len(grammar_i)]
return grammar_i
# 给文法加点
def add_dot():
global doted_grammar
j = 0
n = 0
for i in grammar:
for k in range(len(i) - 2):
doted_grammar.append([])
doted_grammar[n].append(add_char2str(i, k + 3))
doted_grammar[n].append('false')
n += 1
j += 1
# 显示加点后的文法
def print_doted_grammar():
print('----加点后的文法----')
j = 1
for i in doted_grammar:
print('%d.%s' % (j, i[0]))
j += 1
# 显示读入文法
def print_read_grammar():
print('----读入的文法----')
j = 0
for i in grammar:
print('(%d)%s' % (j, i))
j += 1
# 初始化NFA
def init_NFA():
global NFA
for row in range(len(doted_grammar)):
NFA.append([])
for col in range(len(doted_grammar)):
NFA[row].append('')
# 找到点的位置
def find_pos_point(one_grammar):
return one_grammar.find('.')
# 文法是否以start开头,以'.'开始
def is_start(grammar_i, start):
if grammar_i[0].find(start, 0, 1) + grammar_i[0].find('.', 3, 4) == 3:
return True
else:
return False
# 查找以start开头,以'.'开始的文法,返回个数
def find_node(start, grammar_id):
num = 0
for i in doted_grammar:
if is_start(i, start):
grammar_id[num] = doted_grammar.index(i)
num += 1
return num
# 构造NFA
def make_NFA():
global NFA
grammar_id = []
for i in range(len(doted_grammar)):
grammar_id.append('')
init_NFA()
i = 0
for grammar_i in doted_grammar:
pos_point = find_pos_point(grammar_i[0]) # 找到点的位置
if not pos_point + 1 == len(grammar_i[0])
展开阅读全文