在上一篇文章中已经介绍了读文件的操作,那么这一篇文章中将会细致解释词法分析。
在源文件里解析出的单词流必须识别为保留字,标识符,常量,操作符和界符五大类
1.显然我们须要列举出全部的保留字,而这里与保留字相似的那么就是标识符,在C语言中,保留字都是以小写字母开头,并且当中的字母仅仅能是小写字母,而标识符的第一个字母则必须为字符(小写大写皆可)后面能够接大写和小写字母和字符 ‘_’, 在我写的这个编译器中,标识符不能超过100,在C语言中的标识符定义的长度大小远远大于此。
2.对于常量,这里须要注意的是整型和浮点型常量。
3.运算符依照的是以下的表:
C语言运算符表
运算符依照优先级大小由上向下排列,在同一行的运算符具有同样优先级。第二行是全部的一元运算符。
| | |
() [] -> . | 括号(函数等),数组,两种结构成员訪问 | |
! ~ ++ -- + - * & | 否定,按位否定,增量,减量,正负号, 间接,取地址 | |
* / % | 乘,除,取模 | |
+ - | 加,减 | |
<< >> | 左移,右移 | |
< <= >= > | 小于,小于等于,大于等于,大于 | |
== != | 等于,不等于 | |
& | 按位与 | |
^ | 按位异或 | |
| | 按位或 | |
&& | 逻辑与 | |
|| | 逻辑或 | |
? : | 条件 | |
= += -= *= /= &= ^= |= <<= >>= | 各种赋值 | |
, | 逗号(顺序) | |
4.界符:“;”“{}”,单引號,双引號
接下来我介绍的是对保留字的归类,为了查找方便,将保留字依照a-z的顺序排好,根据数组的下标定位,降低寻找的时间
/* * keyword.h * * Created on: Jun 12, 2014 * */#ifndef KEYWORD_H_#define KEYWORD_H_struct keyword{ char *keyName;};static struct keyword key__[]={ {"__int64"}, {"end"}};static struct keyword key_A[]={ {"auto"}, {"end"}};static struct keyword key_B[]={ {"break"}, {"end"}};static struct keyword key_C[]={ {"case"}, {"char"}, {"const"}, {"continue"}, {"end"}};static struct keyword key_D[]={ {"default"}, {"do"}, {"double"}, {"end"}};static struct keyword key_E[]={ {"else"}, {"enum"}, {"extern"}, {"end"}};static struct keyword key_F[]={ {"float"}, {"for"}, {"end"}};static struct keyword key_G[]={ {"goto"}, {"end"}};static struct keyword key_H[]={ {"end"}};static struct keyword key_I[]={ {"if"}, {"int"}, {"end"}};static struct keyword key_J[]={ {"end"}};static struct keyword key_K[]={ {"end"}};static struct keyword key_L[]={ {"long"}, {"end"}};static struct keyword key_M[]={ {"end"}};static struct keyword key_N[]={ {"end"}};static struct keyword key_O[]={ {"end"}};static struct keyword key_P[]={ {"end"}};static struct keyword key_Q[]={ {"end"}};static struct keyword key_R[]={ {"register"}, {"return"}, {"end"}};static struct keyword key_S[]={ {"short"}, {"signed"}, {"sizeof"}, {"static"}, {"struct"}, {"switch"}, {"end"}};static struct keyword key_T[]={ {"typedef"}, {"end"}};static struct keyword key_U[]={ {"union"}, {"unsigned"}, {"end"}};static struct keyword key_V[]={ {"void"}, {"volatile"}, {"end"}};static struct keyword key_W[]={ {"while"}, {"end"}};static struct keyword key_X[]={ {"end"}};static struct keyword key_Y[]={ {"end"}};static struct keyword key_Z[]={ {"end"}};// size is 27static struct keyword *keywords[]={ key__,key_A,key_B,key_C,key_D,key_E, key_F,key_G,key_H,key_I,key_J,key_K, key_L,key_M,key_N,key_O,key_P,key_Q, key_R,key_S,key_T,key_U,key_V,key_W, key_X,key_Y,key_Z};#endif /* KEYWORD_H_ */
以下是词法分析的源代码;
/* * lex.h * * Created on: Jun 13, 2014 * */#include "input.h"#include "keyword.h"#define isDigit(c) (c>='0' && c<='9')#define isUpperLetter(c) (c>='A' && c <='Z')#define isLowerLetter(c) (c>='a' && c<='z')#define isLetter(c) (isUpperLetter || isLowerLetter)
/* * lex.c * * Created on: Jun 13, 2014 * */#include "zcc.h"#include "lex.h"#define curr source.cursorint getToken() { char a[100]; int a_length, i, flag; /* *skip ' ','\n' and '\b' */ while (*curr == ' ' || *curr == 10 || *curr == 9) { curr++; if (*curr == END_OF_FILE) { return -1; } } /* name or keyword on first is a-z */ a_length=0; if (*curr >= 'a' && *curr <= 'z') { IDAndKey: a_length = 0; do { a[a_length++] = *curr++; } while ( isDigit(*curr) || isUpperLetter(*curr) || isLowerLetter(*curr) || *curr == '_'); a[a_length] = '\0'; i = 0; flag = 0; if (*a - 'a' <= 26 && *a - 'a' >= 0) { while (strcmp(keywords[*a - 'a' + 1][i].keyName, "end") != 0) { if (strcmp(keywords[*a - 'a' + 1][i].keyName, a) == 0) { flag = 1; break; } i++; } if (flag == 1) { printf("keyword is %s\n", a); return 1; } else { printf("Identify is %s\n", a); return 1; } } else { printf("Identify is %s\n", a); return 1; } } else if (isUpperLetter(*curr)) { goto IDAndKey; } else if (isDigit(*curr)) { a_length = 0; do { a[a_length++] = *curr++; } while (isDigit(*curr)); //float number if (*curr == '.') { do { a[a_length++] = *curr++; } while (isDigit(*curr)); a[a_length] = '\0'; printf("float number is %s\n", a); return 1; } else { // number a[a_length] = '\0'; printf("number is %s\n", a); return 1; } /* * Operator begin * */ } else if (*curr == '<') { a[a_length++] = *curr++; if (*curr == '<') { a[a_length++] = *curr++; lastOperatorDeal: a[a_length] = '\0'; printf("Operator is %s\n", a); return 1; } else if (*curr == '=') { a[a_length++] = *curr++; goto lastOperatorDeal; } else { goto lastOperatorDeal; } } else if (*curr == '>') { a[a_length++] = *curr++; if (*curr == '>') { a[a_length++] = *curr++; goto lastOperatorDeal; } else if (*curr == '=') { a[a_length++] = *curr++; goto lastOperatorDeal; } else { goto lastOperatorDeal; } } else if (*curr == '=') { a[a_length++] = *curr++; if (*curr == '=') { a[a_length++] = *curr++; goto lastOperatorDeal; } else { goto lastOperatorDeal; } } else if (*curr == '(') { singleOperator: a[a_length++] = *curr++; goto lastOperatorDeal; } else if (*curr == ')') { goto singleOperator; } else if (*curr == '[') { goto singleOperator; } else if (*curr == ']') { goto singleOperator; } else if (*curr == '-') { a[a_length++] = *curr++; if (*curr == '>') { a[a_length++] = *curr++; goto lastOperatorDeal; } else if (*curr == '-') { a[a_length++] = *curr++; goto lastOperatorDeal; } else if (*curr == '=') { a[a_length++] = *curr++; goto lastOperatorDeal; } else { goto lastOperatorDeal; } }else if(*curr=='.'){ goto singleOperator; }else if(*curr=='!'){ a[a_length++]=*curr++; if(*curr=='='){ goto singleOperator; }else{ goto lastOperatorDeal; } }else if(*curr=='~'){ goto singleOperator; }else if(*curr=='+'){ a[a_length++]=*curr++; if(*curr=='+'){ goto singleOperator; }else if(*curr=='='){ goto singleOperator; }else { goto lastOperatorDeal; } }else if(*curr=='-'){ a[a_length++]=*curr++; if(*curr=='-'){ goto singleOperator; }else if(*curr=='='){ goto singleOperator; }else { goto lastOperatorDeal; } }else if(*curr=='*'){ a[a_length++]=*curr++; if(*curr=='='){ goto singleOperator; }else{ goto lastOperatorDeal; } }else if(*curr=='&'){ a[a_length++]=*curr++; if(*curr=='&'){ goto singleOperator; }else if(*curr=='='){ goto singleOperator; }else{ goto lastOperatorDeal; } }else if(*curr=='/'){ a[a_length++]=*curr++; if(*curr=='='){ goto singleOperator; }if(*curr=='/'){ // skip line while(*curr!='\n'){ if(*curr==END_OF_FILE) return -1; curr++; } }else if(*curr=='*'){ curr++; // skip "/**/" while(*curr!=END_OF_FILE) { if(*curr=='*' && *(curr+1)=='/'){ curr+=2; break; } curr++; } }else{ goto lastOperatorDeal; } }else if(*curr=='%'){ a[a_length++]=*curr++; if(*curr=='d'){ goto singleOperator; }else if(*curr=='c'){ goto singleOperator; }else if(*curr=='f'){ goto singleOperator; }else if(*curr=='l'){ a[a_length++]=*curr++; if(*curr=='d') goto singleOperator; else if(*curr=='f') goto singleOperator; else goto singleOperator; } }else if(*curr=='^'){ a[a_length++]=*curr++; if(*curr=='='){ goto singleOperator; }else{ goto lastOperatorDeal; } }else if(*curr=='|'){ a[a_length++]=*curr++; if(*curr=='|'){ goto singleOperator; }else if(*curr=='='){ goto singleOperator; }else{ goto lastOperatorDeal; } }else if(*curr=='?'){ goto singleOperator; }else if(*curr==':'){ goto singleOperator; }else if(*curr==','){ goto singleOperator; }else if(*curr=='\\'){ a[a_length++]=*curr++; if(*curr=='n'){ goto singleOperator; }else { goto lastOperatorDeal; } } /* * Operator end * */ /* * delimiter begin * */ else if(*curr=='{'){ singleDelimiter: a[a_length++]=*curr++; a[a_length]='\0'; printf("Delimiter is %s\n", a); return 1; }else if(*curr=='}'){ goto singleDelimiter; }else if(*curr==';'){ goto singleDelimiter; }else if(*curr=='\''){ goto singleDelimiter; }else if(*curr=='\"'){ goto singleDelimiter; }}
这里实现了将单词分成五类流,并将单词打印出来,在后面的语法分析中将会使用到这里的单词流结果。
忘了说了,我将自己写的编译器命名为:ZCC,头文件都包括在zcc.h中(*^__^*) 嘻嘻……,想写个类似与gcc 一样奇妙的玩意。
最后看測试文档:
struct Student{ int a; char* name;}int main(){ int a=123; float a2=1.2345677; int b=1+3; for(int i=0; i < 100; i++) a+=i; printf("%d\n", a); return 0;}
測试结果:
keyword is structIdentify is StudentDelimiter is {keyword is intIdentify is aDelimiter is ;keyword is charOperator is *Identify is nameDelimiter is ;Delimiter is }keyword is intIdentify is mainOperator is (Operator is )Delimiter is {keyword is intIdentify is aOperator is =number is 123Delimiter is ;keyword is floatIdentify is a2Operator is =float number is 1.2345677Delimiter is ;keyword is intIdentify is bOperator is =number is 1Operator is +number is 3Delimiter is ;keyword is forOperator is (keyword is intIdentify is iOperator is =number is 0Delimiter is ;Identify is iOperator is
做到这里,能够告一小段落了,接下来做的事情就是语法分析。