博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
编译器DIY——词法分析
阅读量:4554 次
发布时间:2019-06-08

本文共 9412 字,大约阅读时间需要 31 分钟。

在上一篇文章中已经介绍了读文件的操作,那么这一篇文章中将会细致解释词法分析。

在源文件里解析出的单词流必须识别为保留字,标识符,常量,操作符和界符五大类

1.显然我们须要列举出全部的保留字,而这里与保留字相似的那么就是标识符,在C语言中,保留字都是以小写字母开头,并且当中的字母仅仅能是小写字母,而标识符的第一个字母则必须为字符(小写大写皆可)后面能够接大写和小写字母和字符 ‘_’, 在我写的这个编译器中,标识符不能超过100,在C语言中的标识符定义的长度大小远远大于此。

2.对于常量,这里须要注意的是整型和浮点型常量。

3.运算符依照的是以下的表:

C语言运算符

运算符依照优先级大小由上向下排列,在同一行的运算符具有同样优先级。第二行是全部的一元运算符。

 

运算符
解释
结合方式
() [] -> . 括号(函数等),数组,两种结构成员訪问
由左向右
! ~ ++ -- + - 

* &

否定,按位否定,增量,减量,正负号,

间接,取地址

由右向左
* / % 乘,除,取模
由左向右
+ - 加,减
由左向右
<< >> 左移,右移
由左向右
< <= >= > 小于,小于等于,大于等于,大于
由左向右
== != 等于,不等于
由左向右
& 按位与
由左向右
^ 按位异或
由左向右
| 按位或
由左向右
&& 逻辑与
由左向右
|| 逻辑或
由左向右
? : 条件
由右向左
= += -= *= /= 

&= ^= |= <<= >>=

各种赋值
由右向左
, 逗号(顺序)
由左向右

4.界符:“;”“{}”,单引號,双引號

接下来我介绍的是对保留字的归类,为了查找方便,将保留字依照a-z的顺序排好,根据数组的下标定位,降低寻找的时间

/* * keyword.h * *  Created on: Jun 12, 2014 *     */#ifndef KEYWORD_H_#define KEYWORD_H_struct keyword{	char *keyName;};static struct keyword key__[]={		{"__int64"},		{"end"}};static struct keyword key_A[]={		{"auto"},		{"end"}};static struct keyword key_B[]={		{"break"},		{"end"}};static struct keyword key_C[]={		{"case"},		{"char"},		{"const"},		{"continue"},		{"end"}};static struct keyword key_D[]={		{"default"},		{"do"},		{"double"},		{"end"}};static struct keyword key_E[]={		{"else"},		{"enum"},		{"extern"},		{"end"}};static struct keyword key_F[]={		{"float"},		{"for"},		{"end"}};static struct keyword key_G[]={		{"goto"},		{"end"}};static struct keyword key_H[]={		{"end"}};static struct keyword key_I[]={		{"if"},		{"int"},		{"end"}};static struct keyword key_J[]={		{"end"}};static struct keyword key_K[]={		{"end"}};static struct keyword key_L[]={		{"long"},		{"end"}};static struct keyword key_M[]={		{"end"}};static struct keyword key_N[]={		{"end"}};static struct keyword key_O[]={		{"end"}};static struct keyword key_P[]={		{"end"}};static struct keyword key_Q[]={		{"end"}};static struct keyword key_R[]={		{"register"},		{"return"},		{"end"}};static struct keyword key_S[]={		{"short"},		{"signed"},		{"sizeof"},		{"static"},		{"struct"},		{"switch"},		{"end"}};static struct keyword key_T[]={		{"typedef"},		{"end"}};static struct keyword key_U[]={		{"union"},		{"unsigned"},		{"end"}};static struct keyword key_V[]={		{"void"},		{"volatile"},		{"end"}};static struct keyword key_W[]={		{"while"},		{"end"}};static struct keyword key_X[]={		{"end"}};static struct keyword key_Y[]={		{"end"}};static struct keyword key_Z[]={		{"end"}};// size is 27static struct keyword *keywords[]={		key__,key_A,key_B,key_C,key_D,key_E,		key_F,key_G,key_H,key_I,key_J,key_K,		key_L,key_M,key_N,key_O,key_P,key_Q,		key_R,key_S,key_T,key_U,key_V,key_W,		key_X,key_Y,key_Z};#endif /* KEYWORD_H_ */

以下是词法分析的源代码;

/* * lex.h * *  Created on: Jun 13, 2014 *      */#include "input.h"#include "keyword.h"#define isDigit(c)			(c>='0' && c<='9')#define isUpperLetter(c)	(c>='A' && c <='Z')#define isLowerLetter(c)	(c>='a' && c<='z')#define isLetter(c)			(isUpperLetter || isLowerLetter)

/* * lex.c * *  Created on: Jun 13, 2014 *       */#include "zcc.h"#include "lex.h"#define curr source.cursorint getToken() {	char a[100];	int a_length, i, flag;	/*	 *skip ' ','\n' and '\b'	 */	while (*curr == ' ' || *curr == 10 || *curr == 9) {		curr++;		if (*curr == END_OF_FILE) {			return -1;		}	}	/* name or keyword on first is a-z */	a_length=0;	if (*curr >= 'a' && *curr <= 'z') {		IDAndKey:		a_length = 0;		do {			a[a_length++] = *curr++;		} while ( isDigit(*curr) || isUpperLetter(*curr) || isLowerLetter(*curr)				|| *curr == '_');		a[a_length] = '\0';		i = 0;		flag = 0;		if (*a - 'a' <= 26 && *a - 'a' >= 0) {			while (strcmp(keywords[*a - 'a' + 1][i].keyName, "end") != 0) {				if (strcmp(keywords[*a - 'a' + 1][i].keyName, a) == 0) {					flag = 1;					break;				}				i++;			}			if (flag == 1) {				printf("keyword is %s\n", a);				return 1;			} else {				printf("Identify is %s\n", a);				return 1;			}		} else {			printf("Identify is %s\n", a);			return 1;		}	} else if (isUpperLetter(*curr)) {		goto IDAndKey;	} else if (isDigit(*curr)) {		a_length = 0;		do {			a[a_length++] = *curr++;		} while (isDigit(*curr));		//float number		if (*curr == '.') {			do {				a[a_length++] = *curr++;			} while (isDigit(*curr));			a[a_length] = '\0';			printf("float number is %s\n", a);			return 1;		} else {			// number			a[a_length] = '\0';			printf("number is %s\n", a);			return 1;		}	/*	 * Operator begin	 * */	} else if (*curr == '<') {		a[a_length++] = *curr++;		if (*curr == '<') {			a[a_length++] = *curr++;		lastOperatorDeal:			a[a_length] = '\0';			printf("Operator is %s\n", a);			return 1;		} else if (*curr == '=') {			a[a_length++] = *curr++;			goto lastOperatorDeal;		} else {			goto lastOperatorDeal;		}	} else if (*curr == '>') {		a[a_length++] = *curr++;		if (*curr == '>') {			a[a_length++] = *curr++;			goto lastOperatorDeal;		} else if (*curr == '=') {			a[a_length++] = *curr++;			goto lastOperatorDeal;		} else {			goto lastOperatorDeal;		}	} else if (*curr == '=') {		a[a_length++] = *curr++;		if (*curr == '=') {			a[a_length++] = *curr++;			goto lastOperatorDeal;		} else {			goto lastOperatorDeal;		}	} else if (*curr == '(') {	    singleOperator:		a[a_length++] = *curr++;		goto lastOperatorDeal;	} else if (*curr == ')') {		goto singleOperator;	} else if (*curr == '[') {		goto singleOperator;	} else if (*curr == ']') {		goto singleOperator;	} else if (*curr == '-') {		a[a_length++] = *curr++;		if (*curr == '>') {			a[a_length++] = *curr++;			goto lastOperatorDeal;		} else if (*curr == '-') {			a[a_length++] = *curr++;			goto lastOperatorDeal;		} else if (*curr == '=') {			a[a_length++] = *curr++;			goto lastOperatorDeal;		} else {			goto lastOperatorDeal;		}	}else if(*curr=='.'){		goto singleOperator;	}else if(*curr=='!'){		a[a_length++]=*curr++;		if(*curr=='='){			goto singleOperator;		}else{			goto lastOperatorDeal;		}	}else if(*curr=='~'){		goto singleOperator;	}else if(*curr=='+'){        a[a_length++]=*curr++;        if(*curr=='+'){        	goto singleOperator;        }else if(*curr=='='){        	goto singleOperator;        }else {        	goto lastOperatorDeal;        }	}else if(*curr=='-'){        a[a_length++]=*curr++;        if(*curr=='-'){        	goto singleOperator;        }else if(*curr=='='){        	goto singleOperator;        }else {        	goto lastOperatorDeal;        }	}else if(*curr=='*'){        a[a_length++]=*curr++;        if(*curr=='='){        	goto singleOperator;        }else{            goto lastOperatorDeal;        }	}else if(*curr=='&'){		a[a_length++]=*curr++;		if(*curr=='&'){			goto singleOperator;		}else if(*curr=='='){			goto singleOperator;		}else{			goto lastOperatorDeal;		}	}else if(*curr=='/'){		a[a_length++]=*curr++;	    if(*curr=='='){	    	goto singleOperator;	    }if(*curr=='/'){        	// skip line        	while(*curr!='\n'){        		if(*curr==END_OF_FILE)        			return -1;        		curr++;        	}        }else if(*curr=='*'){        	curr++;        	// skip "/**/"            while(*curr!=END_OF_FILE)            {            	if(*curr=='*' && *(curr+1)=='/'){            		curr+=2;            		break;            	}                curr++;            }        }else{        	goto lastOperatorDeal;        }	}else if(*curr=='%'){		a[a_length++]=*curr++;		if(*curr=='d'){			goto singleOperator;		}else if(*curr=='c'){			goto singleOperator;		}else if(*curr=='f'){			goto singleOperator;		}else if(*curr=='l'){			a[a_length++]=*curr++;			if(*curr=='d')				goto singleOperator;			else if(*curr=='f')				goto singleOperator;			else				goto singleOperator;		}	}else if(*curr=='^'){		a[a_length++]=*curr++;	    if(*curr=='='){	    	goto singleOperator;	    }else{	    	goto lastOperatorDeal;	    }	}else if(*curr=='|'){		a[a_length++]=*curr++;		if(*curr=='|'){			goto singleOperator;		}else if(*curr=='='){			goto singleOperator;		}else{			goto lastOperatorDeal;		}	}else if(*curr=='?'){        goto singleOperator;	}else if(*curr==':'){        goto singleOperator;	}else if(*curr==','){		goto singleOperator;	}else if(*curr=='\\'){		a[a_length++]=*curr++;		if(*curr=='n'){			goto singleOperator;		}else {			goto lastOperatorDeal;		}	}	/*	 * Operator end	 * */	/*	 * delimiter begin	 * */	else if(*curr=='{'){		singleDelimiter:		a[a_length++]=*curr++;		a[a_length]='\0';		printf("Delimiter is %s\n", a);		return 1;	}else if(*curr=='}'){        goto singleDelimiter;	}else if(*curr==';'){		goto singleDelimiter;	}else if(*curr=='\''){		goto singleDelimiter;	}else if(*curr=='\"'){		goto singleDelimiter;	}}

这里实现了将单词分成五类流,并将单词打印出来,在后面的语法分析中将会使用到这里的单词流结果。

忘了说了,我将自己写的编译器命名为:ZCC,头文件都包括在zcc.h中(*^__^*) 嘻嘻……,想写个类似与gcc 一样奇妙的玩意。

最后看測试文档:

struct  Student{   int a;   char* name;}int main(){    int a=123;    float a2=1.2345677;    int b=1+3;    for(int i=0; i < 100; i++)    		a+=i;    printf("%d\n", a);    return 0;}

測试结果:

keyword is structIdentify is StudentDelimiter is {keyword is intIdentify is aDelimiter is ;keyword is charOperator is *Identify is nameDelimiter is ;Delimiter is }keyword is intIdentify is mainOperator is (Operator is )Delimiter is {keyword is intIdentify is aOperator is =number is 123Delimiter is ;keyword is floatIdentify is a2Operator is =float number is 1.2345677Delimiter is ;keyword is intIdentify is bOperator is =number is 1Operator is +number is 3Delimiter is ;keyword is forOperator is (keyword is intIdentify is iOperator is =number is 0Delimiter is ;Identify is iOperator is 

做到这里,能够告一小段落了,接下来做的事情就是语法分析。

转载于:https://www.cnblogs.com/mengfanrong/p/3843400.html

你可能感兴趣的文章
android 调用系统相机录像并保存
查看>>
BW系统表的命名规则
查看>>
Asp.Net在IE10下出现_doPostBack未定义的解决办法 LinkButton
查看>>
《CLR via C#》Part2之Chapter5 基元类型、引用类型和值类型(一)
查看>>
1-9 RHEL7-文件权限管理
查看>>
apache服务器安装
查看>>
Search a 2D Matrix
查看>>
文件解析漏洞
查看>>
弹性成像的一些术语
查看>>
作业2
查看>>
vim 笔记
查看>>
MySQL的基本使用命令
查看>>
output 参数在存储过程中的用法
查看>>
大数加法和乘法(高精度)
查看>>
利用SynchronizationContext.Current在线程间同步上下文
查看>>
python各种类型转换-int,str,char,float,ord,hex,oct等
查看>>
sublime Text3 快捷键
查看>>
19 年书单
查看>>
不变模式
查看>>
matlab去云雾
查看>>