编译原理词法分析器-ll1-lr0-python实现代码.docx

资源描述

计算机科学与通信工程学院编译原理实验报告题目： 1.词法分析器 2. LL(1)分析器 3. LR(0)分析器班级：姓名: 学号：指导老师： 2017年月目录一、实验题目 1 二、实验目的和要求 1 三、代码实现 2 四、总结 27 一、实验题目 1. 词法分析器分析一段程序代码，将代码中的单词符号分解出来，并对其进行检查，输出token表和error表 2. LL(1)文法分析器分析给定文法。求出文法的FIRST集，FOLLOW集，并构建分析表，对给定输入串进行分析。 3. LR(0)文法分析器分析给定文法。用Ꜫ_CLOSURE方法构造文法的LR(0)项目集规范族，根据状态转换函数GO构造出文法的DFA,并转换为分析表，对给定输入串进行分析。二、实验目的和要求 1. 学会词法分析器的实现思路。 2. 学会求解FIRST集， FOLLOW集，构造LL(1)分析表。 3. 学会Ꜫ_CLOSURE方法，状态转换函数GO, 构造LR(0)分析表。三、代码实现 1. 词法分析器 program.txt 中存放要分析的文法： E->TR R->+TR|-TR|~ T->FG G->*FG|/FG|~ F->(E)|i 代码： KEYWORD_LIST = ['while', 'if', 'else', 'switch', 'case'] SEPARATOR_LIST = [';', ':', ',', '(', ')', '[', ']', '{', '}'] OPERATOR_LIST1 = ['+', '-', '*'] OPERATOR_LIST2 = ['<=', '<', '==', '=', '>', '>='] CATEGORY_DICT = { # KEYWORD "while": {"while": ""}, "if": {"if": ""}, "else": {"else": ""}, "switch": {"switch": ""}, "case": {"case": ""}, # OPERATOR "+": {"+": ""}, "-": {"-": ""}, "*": {"*": ""}, "<=": {"relop": "LE"}, "<": {"relop": "LT"}, ">=": {"relop": "GE"}, ">": {"relop": "GT"}, "==": {"relop": "EQ"}, "=": {"=": ""}, # SEPARATOR ";": {";": ""}, ":": {":": ""}, ",": {",": ""}, "(": {"(": ""}, ")": {")": ""}, "[": {"]": ""}, "]": {"]": ""}, "{": {"{": ""}, "}": {"}": ""}, } CONSTANTTABLE = [] TOKENTABLE = [] OPERATORTABLE = [] KEYWORDTABLE = [] SEPARATORTABLE = [] UNDEFINEDTABLE = [] # READ read_, method): temp_str = "" try: file = open(path, method) for line in file: line = line.replace('\n', " ") temp_str += line temp_str = str(temp_str) except IOError as e: print(e) exit() finally: () return temp_str.strip() + " " # GETBE def getbe(): global token getchar() token = "" return # GETCHAR def getchar(): global character global location while all_string[location] == " ": location = location + 1 character = all_string[location] return character # LINK TOKEN def concatenation(): global token global character token = token + character # IS NUMBER def digit(): if '0' <= character <= '9': return True return False # IS ALPHABET def letter(): if 'A' <= character <= 'Z' or 'a' <= character <= 'z': return True return False # IS IDENTIFIER def reserve(): if token in KEYWORD_LIST: return CATEGORY_DICT[token] else: return 0 # RETRACT def retract(): global location global character # location = location - 1 character = "" return # MAIN FUNCTION def main(): global token global character global location s = getchar() getbe() if 'a' <= s <= 'z' or 'A' <= s <= 'Z': while letter() or digit(): concatenation() location = location + 1 character = all_string[location] retract() c = reserve() if c == 0: TOKENTABLE.append(token) print("这是标识符：{'", token, "':'", TOKENTABLE.index(token), "'}") else: KEYWORDTABLE.append(token) print("这是保留字：", CATEGORY_DICT[token]) elif '0' <= s <= '9': while digit(): concatenation() location = location + 1 character = all_string[location] retract() CONSTANTTABLE.append(token) print("这是常数：{'", token, "':'", CONSTANTTABLE.index(token), "'}") elif s in OPERATOR_LIST1: location = location + 1 OPERATORTABLE.append(s) print("这是单操作符：", CATEGORY_DICT[s]) elif s in OPERATOR_LIST2: location = location + 1 character = all_string[location] if character == '=': OPERATORTABLE.append(s + character) print("这是双操作符：", CATEGORY_DICT[s + character]) else: retract() location = location + 1 OPERATORTABLE.append(s) print("这是单操作符:", CATEGORY_DICT[s]) elif s in SEPARATOR_LIST: location = location + 1 SEPARATORTABLE.append(s) print("这是分隔符：", CATEGORY_DICT[s]) else: location += 1 UNDEFINEDTABLE.append(s) print("error:undefined identity :'", s, "'") if __name__ == '__main__': character = "" token = "" all_string = read_file("program.txt", "r") location = 0 while location + 1 < len(all_string): main() print('KEYWORDTABLE:', KEYWORDTABLE) print('TOKENTABLE:', TOKENTABLE) print('CONSTANTTABLE:', CONSTANTTABLE) print('OPERATORTABLE:', OPERATORTABLE) print('SEPARATORTABLE:', SEPARATORTABLE) 运行结果： 2. LL(1)分析器 program.txt 中存放要分析的文法： E->TR R->+TR|-TR|~ T->FG G->*FG|/FG|~ F->(E)|i 输入串： i+i*i 代码： NonTermSet = set() # 非终结符集合 TermSet = set() # 终结符集合 First = {} # First集 Follow = {} # Follow集 GramaDict = {} # 处理过的产生式 Code = [] # 读入的产生式 AnalysisList = {} # 分析表 StartSym = "" # 开始符号 EndSym = '#' # 结束符号为“#“ Epsilon = "~" # 由于没有epsilon符号用“~”代替 # 构造First集 def getFirst(): global NonTermSet, TermSet, First, Follow, FirstA for X in NonTermSet: First[X] = set() # 初始化非终结符First集为空 for X in TermSet: First[X] = set(X) # 初始化终结符First集为自己 Change = True while Change: # 当First集没有更新则算法结束 Change = False for X in NonTermSet: for Y in GramaDict[X]: k = 0 Continue = True while Continue and k < len(Y): if not First[Y[k]] - set(Epsilon) <= First[X]: # 没有一样的就添加，并且改变标志 if Epsilon not in First[Y[k]] and Y[k] in NonTermSet and k > 0: # Y1到Yi候选式都有~存在 Continue = False else: First[X] |= First[Y[k]] - set(Epsilon) Change = True if Epsilon not in First[Y[k]]: Continue = False k += 1 if Continue: # X->~或者Y1到Yk均有~产生式 First[X] |= set(Epsilon) # FirstA[Y] |= set(Epsilon) # 构造Follow集 def getFollow(): global NonTermSet, TermSet, First, Follow, StartSym for A in NonTermSet: Follow[A] = set() Follow[StartSym].add(EndSym) # 将结束符号加入Follow[开始符号]中 Change = True while Change: # 当Follow集没有更新算法结束 Change = False for X in NonTermSet: for Y in GramaDict[X]: for i in range(len(Y)): if Y[i] in TermSet: continue Flag = True for j in range(i + 1, len(Y)): # continue if not First[Y[j]] - set(Epsilon) <= Follow[Y[i]]: Follow[Y[i]] |= First[Y[j]] - set(Epsilon) # 步骤2 FIRST(β)/~ 加入到FOLLOW(B)中。 Change = True if Epsilon not in First[Y[j]]: Flag = False break if Flag: if not Follow[X] <= Follow[Y[i]]: # 步骤3 β->~,把FOLLOW(A)加到FOLLOW(B)中 Follow[Y[i]] |= Follow[X] Change = True # 构造分析表 def getAnalysisList(): for nonX in NonTermSet: AnalysisList[nonX] = dict() row = AnalysisList[nonX] flag = True for Y in GramaDict[nonX]: for term in TermSet: if term in First[Y[0]] and term in First[nonX]: row[term] = nonX+'->'+Y if Epsilon in First[nonX] and flag: flag = False for tmp in Follow[nonX]: row[tmp] = nonX+'->'+Epsilon print('分析表:') for nonX in NonTermSet: print(' ', nonX, AnalysisList[nonX]) # 查询分析表 def findAnalysisList(non, ter): try: tmp = AnalysisList[non][ter] X, Y = tmp.split('->') except Exception as e: print('find error ') # M[A,a]为空，发现语法错误 print(e) pass return Y # 显示格式 def display(show_list): for item in show_list: print(' %-25s' % item, end='') print() # LL(1)分析器 def analyzer(): head = ["Stack", "StackTop", 'NowStr', "InputStr", "Action"] # inputStr = 'i+i*i' + EndSym inputStr = input("请输入表达式：") + EndSym print('分析过程：') display(head) stack = [] location = 0 stack.append(EndSym) stack.append(StartSym) stack_top = stack.pop() while stack_top != EndSym and location < len(inputStr): if stack_top in TermSet and inputStr[location] == stack_top: # x = a != '#'， mess = '匹配，弹出栈顶符号' + stack_top + '并读入输入串的下一符号' + inputStr[location + 1] display([stack, stack_top, inputStr[location], inputStr[location + 1: len(inputStr)], mess]) location += 1 stack_top = stack.pop() elif stack_top in NonTermSet and inputStr[location] in TermSet: # x为一非终结符A,则查M[A,a] result = findAnalysisList(stack_top, inputStr[location]) if result == Epsilon: # M[A,a]中的产生式为A->~，只将A弹出 mess = '弹出栈顶符号' + stack_top + '因M[' + stack_top + ',' + inputStr[location] + ']中为' + stack_top mess = mess + '->~,故不压栈' else: # M[A,a]中的产生式右部符号串按逆序逐一压入栈中 mess = '弹出栈顶符号' + stack_top + ',将M[' + stack_top + ',' + inputStr[ location] + ']中的' + stack_top + '->' + result + '的' + result mess = mess + '逆序压栈' tmp_list = [] for char in result: tmp_list.append(char) tmp_list.reverse() stack.extend(tmp_list) display([stack, stack_top, inputStr[location], inputStr[location + 1: len(inputStr)], mess]) stack_top = stack.pop() else: break if stack_top == EndSym and inputStr[location] == EndSym: # x = a = '#' 分析成功,分析器停止工作 display([[], '#', '#', '', '匹配，分析成功']) print() print('************************') print('* Analysis Success *') print('************************') else: print('Analysis Error') # 读取文法 def readGrammar(): try: file = open('grammar.txt', 'r') for line in file: line = line.replace('\n', "") Code.append(line) except IOError as e: print(e) exit() finally: () return Code # 初始化 def init(): global NonTermSet, TermSet, First, Follow, StartSym, Code Code = readGrammar() n = int(len(Code)) print('产生式个数:', n) StartSym = Code[0][0] print("开始符号：", StartSym) print('产生式：G[', StartSym, ']:') for i in range(n): X, Y = Code[i].split('->') print(' ', Code[i]) NonTermSet.add(X) Y = Y.split('|') for Yi in Y: TermSet |= set(Yi) if X not in GramaDict: GramaDict[X] = set() GramaDict[X] |= set(Y) # 生成产生式集 TermSet -= NonTermSet print('非终结符:', NonTermSet) print('终结符:', TermSet) getFirst() getFollow() print("FIRST集:") for k in NonTermSet: print(' FIRST[', k, ']: ', First[k]) print("FOLLOW集:") for k, v in Follow.items(): print(' FOLLOW[', k, ']: ', v) TermSet -= set(Epsilon) TermSet |= set(EndSym) getAnalysisList() analyzer() init() 运行结果： 3. LR(0)分析器 program.txt 中存放要分析的文法： X->S S->BB B->aB B->b 输入串： abab 代码： VN = [] # 非终结符 VT = [] # 终结符 NFA = [] # NFA表 DFA = [] # DFA表 grammar = [] # 读入的文法 doted_grammar = [] # 加点后的文法 VN2Int = {} # 非终结符映射 VT2Int = {} # 终结符映射 action = [] # action表 goto = [] # goto表 DFA_node = [] # DFA节点表 status_stack = [] # 状态栈 symbol_stack = [] # 符号栈 now_state = '' # 栈顶状态 input_ch = '' # 栈顶字符 input_str = '' # 输入串 location = 0 # 输入位置 now_step = 0 # 当前步骤 # 读取文法 def read_grammar(): global grammar with open(, 'r') as file: for line in file: line = line.replace('\n', "") grammar.append(line) () # 找到终结符和非终结符 def find_term_non(): global grammar n = int(len(grammar)) temp_vt = [] l = 0 for i in range(n): X, Y = grammar[i].split('->') if X not in VN: VN.append(X) VN2Int.update({X: l}) l += 1 for Yi in Y: temp_vt.append(Yi) m = 0 for i in temp_vt: if i not in VN and i not in VT: VT.append(i) VT2Int.update({i: m}) m += 1 VT.append('#') VT2Int.update({'#': m}) # 在字符串某个位置加点 def add_char2str(grammar_i, i): grammar_i = grammar_i[0:i] + '.' + grammar_i[i:len(grammar_i)] return grammar_i # 给文法加点 def add_dot(): global doted_grammar j = 0 n = 0 for i in grammar: for k in range(len(i) - 2): doted_grammar.append([]) doted_grammar[n].append(add_char2str(i, k + 3)) doted_grammar[n].append('false') n += 1 j += 1 # 显示加点后的文法 def print_doted_grammar(): print('----加点后的文法----') j = 1 for i in doted_grammar: print('%d.%s' % (j, i[0])) j += 1 # 显示读入文法 def print_read_grammar(): print('----读入的文法----') j = 0 for i in grammar: print('(%d)%s' % (j, i)) j += 1 # 初始化NFA def init_NFA(): global NFA for row in range(len(doted_grammar)): NFA.append([]) for col in range(len(doted_grammar)): NFA[row].append('') # 找到点的位置 def find_pos_point(one_grammar): return one_grammar.find('.') # 文法是否以start开头，以'.'开始 def is_start(grammar_i, start): if grammar_i[0].find(start, 0, 1) + grammar_i[0].find('.', 3, 4) == 3: return True else: return False # 查找以start开头，以'.'开始的文法,返回个数 def find_node(start, grammar_id): num = 0 for i in doted_grammar: if is_start(i, start): grammar_id[num] = doted_grammar.index(i) num += 1 return num # 构造NFA def make_NFA(): global NFA grammar_id = [] for i in range(len(doted_grammar)): grammar_id.append('') init_NFA() i = 0 for grammar_i in doted_grammar: pos_point = find_pos_point(grammar_i[0]) # 找到点的位置 if not pos_point + 1 == len(grammar_i[0])

展开阅读全文