(四)基於Flex設計實現C子集的詞法分析器
前期準備
為了寫出C子集的詞法分析器,首先應該瞭解C子集有哪些。
參考C 基本語法(菜鳥教程) 先進行總結:
數字
需要考慮到十進位制、十六進位制、八進位制、二進位制、正負數、小數、科學計數法以及一些數字字尾:u,l,f等(注意浮點數不能加uU)。(應該沒了吧?)
關鍵字:
auto、break、case、char、const、continue、default、do、double、else、enum、extern、float、for、goto、if、int、long、register、return、short、signed、sizeof、static、struct、switch、typedef、unsigned、union、void、volatile、while。
識別符號:
C 識別符號是用來標識變數、函式,或任何其他使用者自定義專案的名稱。一個識別符號以字母 A-Z 或 a-z 或下劃線 _ 開始,後跟零個或多個字母、下劃線和數字(0-9)。
運算子:
算數運算子:+、-、*、/、%、++、--
關係運算符:==、!=、>、<、>=、<=
邏輯運算子:&&、||、!
位運算子:&、|、^、~、<<、>>
賦值運算子:=、+=、-=、*=、/=、%=、<<=、>>=、&=、^=、|=
雜項運算子我覺得單憑flex無法實現。
標點符號
三種括號:{}[]() 以及 :,;.-> " '
(應該沒了吧?)
flex程式碼實現
$ cat C-lexical-analyzer.l %{ #include<stdio.h> %} /*數字定義*/ /*科學計數表示*/ science {decimal}(\.[0-9]+)?([Ee][-+]?[0-9]+)? /*十進位制*/ decimal ([-+])?(0|[1-9][0-9]*) /*十六進位制*/ hexadecimal 0[xX][a-fA-F0-9]+ /*二進位制*/ binary 0[bB][01]+ /*八進位制*/ octal 0[0-7]+ /*總表示*/ number ({hexadecimal}|{binary}|{science}|{octal})(([uU]?[Ll]?)|([Ll]?[Uu]?)|([fF]?)) /*注意浮點數總是有符號,不需要Uu字尾,所以在接下來單做一個浮點數異常處理*/ /*數字異常處理*/ floatexcption {decimal}\.([0-9]+)?([Ee][-+]?[0-9]+)?[Uu] excption [0-9][0-9a-zA-Z\.]+ /*關鍵字*/ AUTO auto BREAK break CASE case CHAR char CONST const CONTINUE continue DEFAULT default DO do DOUBLE double ELSE else ENUM enum EXTERN extern FLOAT float FOR for GOTO goto IF if INT int LONG long REGISTER register RETURN return SHORT short SIGNED signed SIZEOF sizeof STATIC static STRUCT struct SWITCH switch TYPEDEF typedef UNSIGNED unsigned UNION union VOID void VOLATILE volatile WHILE while /*識別符號定義*/ identifier [a-z_A-Z][a-z_A-Z0-9]* /*其它字元*/ whitespace [ \t\n\r\f\v]+ errno . /*運算子*/ /*算術運算子*/ ADD\+ SUB\- MUL\* QUO\/ REM% INC\+\+ DEC\-\- /*賦值運算子*/ ASSIGN= ADD_ASSIGN\+= SUB_ASSIGN\-= MUL_ASSIGN\*= QUO_ASSIGN\/= REM_ASSIGN%= AND_ASSIGN\&= OR_ASSIGN\|= XOR_ASSIGN\^= SHL_ASSIGN<<= SHR_ASSIGN>>= AND_NOT_ASSIGN~= /*位運算子*/ AND& OR\| XOR\^ SHL<< SHR>> AND_NOT~ /*邏輯運算子*/ LAND&& LOR\|\| NOT\! /*關係運算符*/ EQL== LSS< GTR> NEQ!= LEQ<= GEQ>= /*標點符號*/ LPAREN\( LBRACK\[ LBRACE\{ COMMA, PERIOD\. RPAREN\) RBRACK\] RBRACE\} SEMICOLON; COLON: POT\-> DQUA\" SQUA\' %% /*關鍵字*/ {AUTO} {printf("Key Word:%s\n",yytext);} {BREAK} {printf("Key Word:%s\n",yytext);} {CASE} {printf("Key Word:%s\n",yytext);} {CHAR} {printf("Key Word:%s\n",yytext);} {CONST} {printf("Key Word:%s\n",yytext);} {CONTINUE} {printf("Key Word:%s\n",yytext);} {DEFAULT} {printf("Key Word:%s\n",yytext);} {DO} {printf("Key Word:%s\n",yytext);} {DOUBLE} {printf("Key Word:%s\n",yytext);} {ELSE} {printf("Key Word:%s\n",yytext);} {ENUM} {printf("Key Word:%s\n",yytext);} {EXTERN} {printf("Key Word:%s\n",yytext);} {FLOAT} {printf("Key Word:%s\n",yytext);} {FOR} {printf("Key Word:%s\n",yytext);} {GOTO} {printf("Key Word:%s\n",yytext);} {IF} {printf("Key Word:%s\n",yytext);} {INT} {printf("Key Word:%s\n",yytext);} {LONG} {printf("Key Word:%s\n",yytext);} {REGISTER} {printf("Key Word:%s\n",yytext);} {RETURN} {printf("Key Word:%s\n",yytext);} {SHORT} {printf("Key Word:%s\n",yytext);} {SIGNED} {printf("Key Word:%s\n",yytext);} {SIZEOF} {printf("Key Word:%s\n",yytext);} {STATIC} {printf("Key Word:%s\n",yytext);} {STRUCT} {printf("Key Word:%s\n",yytext);} {SWITCH} {printf("Key Word:%s\n",yytext);} {TYPEDEF} {printf("Key Word:%s\n",yytext);} {UNSIGNED} {printf("Key Word:%s\n",yytext);} {UNION} {printf("Key Word:%s\n",yytext);} {VOID} {printf("Key Word:%s\n",yytext);} {VOLATILE} {printf("Key Word:%s\n",yytext);} {WHILE} {printf("Key Word:%s\n",yytext);} /*提前處理浮點數+uU的異常*/ {floatexcption} {printf("Float Execption: %s\n",yytext);} /*數字表示*/ {number} {printf("Number:%s\n",yytext);} /*異常數字處理*/ {excption} {printf("Number Execption:%s\n",yytext);} /*跳過空白*/ {whitespace} {} /*運算子*/ /*算術運算子*/ {ADD} {printf("Operator:%s\n",yytext);} {SUB} {printf("Operator:%s\n",yytext);} {MUL} {printf("Operator:%s\n",yytext);} {QUO} {printf("Operator:%s\n",yytext);} {REM} {printf("Operator:%s\n",yytext);} {INC} {printf("Operator:%s\n",yytext);} {DEC} {printf("Operator:%s\n",yytext);} /*邏輯運算子*/ {LAND} {printf("Operator:%s\n",yytext);} {LOR} {printf("Operator:%s\n",yytext);} {NOT} {printf("Operator:%s\n",yytext);} /*賦值運算子*/ {ASSIGN} {printf("Operator:%s\n",yytext);} {ADD_ASSIGN} {printf("Operator:%s\n",yytext);} {SUB_ASSIGN} {printf("Operator:%s\n",yytext);} {MUL_ASSIGN} {printf("Operator:%s\n",yytext);} {QUO_ASSIGN} {printf("Operator:%s\n",yytext);} {REM_ASSIGN} {printf("Operator:%s\n",yytext);} {AND_ASSIGN} {printf("Operator:%s\n",yytext);} {OR_ASSIGN} {printf("Operator:%s\n",yytext);} {XOR_ASSIGN} {printf("Operator:%s\n",yytext);} {SHL_ASSIGN} {printf("Operator:%s\n",yytext);} {SHR_ASSIGN} {printf("Operator:%s\n",yytext);} {AND_NOT_ASSIGN} {printf("Operator:%s\n",yytext);} /*位運算子*/ {AND} {printf("Operator:%s\n",yytext);} {OR} {printf("Operator:%s\n",yytext);} {XOR} {printf("Operator:%s\n",yytext);} {SHL} {printf("Operator:%s\n",yytext);} {SHR} {printf("Operator:%s\n",yytext);} {AND_NOT} {printf("Operator:%s\n",yytext);} /*關係運算符*/ {EQL} {printf("Operator:%s\n",yytext);} {LSS} {printf("Operator:%s\n",yytext);} {GTR} {printf("Operator:%s\n",yytext);} {NEQ} {printf("Operator:%s\n",yytext);} {LEQ} {printf("Operator:%s\n",yytext);} {GEQ} {printf("Operator:%s\n",yytext);} /*標點符號*/ {LPAREN} {printf("Punctuation:%s\n",yytext);} {LBRACK} {printf("Punctuation:%s\n",yytext);} {LBRACE} {printf("Punctuation:%s\n",yytext);} {COMMA} {printf("Punctuation:%s\n",yytext);} {PERIOD} {printf("Punctuation:%s\n",yytext);} {RPAREN} {printf("Punctuation:%s\n",yytext);} {RBRACK} {printf("Punctuation:%s\n",yytext);} {RBRACE} {printf("Punctuation:%s\n",yytext);} {SEMICOLON} {printf("Punctuation:%s\n",yytext);} {COLON} {printf("Punctuation:%s\n",yytext);} {POT} {printf("Punctuation:%s\n",yytext);} {DQUA} {printf("Punctuation:%s\n",yytext);} {SQUA} {printf("Punctuation:%s\n",yytext);} {identifier} {printf("ID:%s\n",yytext);} {errno} {printf("Mystery character:%s\n",yytext);} %% int main(int argc,char **argv) { yylex(); return 0; } int yywarp(){ return 1; }
測試效果:
$ ./start.sh#編譯指令碼 編譯完成,請手動執行C-lexical-analyzer
[jin1ming@ML C-Lex]$ ./C-lexical-analyzer 78.987e76f Number:78.987e76f adsda ID:adsda srg090 ID:srg090 _12 ID:_12 !@#$# Operator:! Mystery character:@ Mystery character:# Mystery character:$ Mystery character:# 121qwqer Number Execption:121qwqer 1.2. Number Execption:1.2. 1.2uf Number Execption:1.2uf == Operator:== - Operator:- = Operator:= ^ Operator:^ ! Operator:! >> Operator:>> ,.! Punctuation:, Punctuation:. Operator:!