用YACCLEX
设计
领导形象设计圆作业设计ao工艺污水处理厂设计附属工程施工组织设计清扫机器人结构设计
计算机语言
用YACC/LEX 设计计算机语言
前言:YACC (Yet Another Compiler Compiler)是1974年在Unix 下设计出来的一个优秀的计算机语法
分析
定性数据统计分析pdf销售业绩分析模板建筑结构震害分析销售进度分析表京东商城竞争战略分析
工具。LEX 是相应的词法分析工具。在Linux 下,也有
YACC/LEX 的实现版本及相关资料。通过这套工具,可以在只编写出计算机语言的语法后,就可以生成自底向上的语法分析程序(词法分析类似),可以大大加快计算机语言的实现速度。Turbo Pascal/Free Pascal/Delphi 程序员请注意: Pascal 语言下的YACC/LEX 实现可以在
~ag/tply/ 地址下找到详细信息。有关YACC 和LEX 的语法我们附在后面。这里,我们主要讨论一个具体的语言(如Basic),如何用YACC/LEX 编程实现。代码存放在下载栏目中(c语言,用GCC 编译通过),可以任意使用,其它的源代码和例子也可以在那里找到。有关问题:1、首要问题:编译还是解释。如果选择编译,那么生成了目标机器上的可执行代码。如果选择解释,那么在解释过程中(或完成后)执行中间代码。Java和.NET 已经混淆了这两方面的区分。2、数
据属性问题:一个普通的编译/解释器必须随时跟踪变量、
表
关于同志近三年现实表现材料材料类招标技术评分表图表与交易pdf视力表打印pdf用图表说话 pdf
达式的数据类型、作用范围等问题。最头疼的就是数据类型了。因为编译/解释器必须自己处理不同数据类型的转换工作,如果有六种数据类型如Char、Byte、SmallInt、Word、LongInt、Dword,就必须处理32种计算方法。所以现在新的的语言如(VBScript 等)都采用了Variant 数据类型,
这样在计算过程中,不需要考虑过多的数据类型转换问题,在执行时才做类型检查。因为我们当时还不知道GCC的Lib 中支持Variant 数据类型,因此自己实现了Variant数据类型。// Variant 数据类型
#define NOTYPE 0
#define CHARTYPE 1
#define BYTETYPE 2
#define INTEGERTYPE 3
#define DWORDTYPE 4
#define REALTYPE 5
#define STRINGTYPE 6
#define INFOTYPE 7
#define TMPSTRINGSIZE 128 /* Variant Structure */ typedef struct {
int ValueType;
union {
Char Character;
Byte BYTE;
int Integer;
DWord DWORD;
double Real;
PTString pString;
void *pInfo;
} Value;
} TVariant, *PVariant; // Variant 过程PVariant VarNew(void);
void VarFree(PVariant p);
int VarGetType(PVariant p);
void VarSetType(PVariant p,int tp);
void VarAssign(PVariant dest,PVariant src); Char VarGetChar(PVariant p);
Byte VarGetByte(PVariant p);
int VarGetInteger(PVariant p);
DWord VarGetDWord(PVariant p);
double VarGetReal(PVariant p);
Char *VarGetString(PVariant p);
void *VarGetInfo(PVariant p);
void VarSetChar(PVariant p,Char c);
void VarSetByte(PVariant p,Byte b);
void VarSetInteger(PVariant p,int n);
void VarSetDWord(PVariant p,DWord d);
void VarSetReal(PVariant p,double e);
void VarSetString(PVariant p,Char *s);
void VarSetInfo(PVariant p,void *q);
int VarTypeCast(PVariant p,int datatype);
int VarMakeEqual(PVariant a,PVariant b);
void VarStrSetLength(PVariant p,DWord len);
void VarStrCompress(PVariant p);
DWord VarStrlen(PVariant p);
void VarStrToUpper(PVariant p);
void VarStrToLower(PVariant p);
int VarStrCompare(PVariant p,PVariant q);
int VarStrCompareCase(PVariant p,PVariant q);
void VarStrAssign(PVariant dest,PVariant src);
void VarStrCat(PVariant dest,PVariant src);
void VarStrDelete(PVariant p,DWord begin,DWord len); void VarStrGetChar(PVariant p,DWord offset);
void VarStrSetChar(PVariant p,DWord offset,Char c); DWord VarStrLocChar(PVariant p,DWord begin,Char c); DWord VarStrSubStr(PVariant p,PVariant sub); 3、符号
表:符号表用来登记各种常量、变量、函数、过程、结构的有关属性,因为一些数据类型是其它数据类型的导出,所以这里采用二叉数存放、检索信息。为了解决导出类型问题,此二叉数必须穿线。
typedef enum {
eNoDefine,eConstDefine,eTypeDefine,eVarDefine,eValPa ramDefine,
eVarParamDefine,eFieldDefine,
eProgDefine,eFuncDefine,eProcDefine
} TDefineKey;
typedef enum {
eDeclare,eForward,eStandard
} TRoutineKey;
typedef enum {
eNoForm,eScalarForm,eEnumForm,eSubRangeForm, eArrayForm,eRecordForm
} TypeForm;
typedef struct {
TDefineKey Key;
union {
PVariant pValue;
struct {
TRoutineKey Key;
int ParamCount;
int TotalParamSize;
int TotalLocalSize;
struct tagTSymbolTable *Params;
struct tagTSymbolTable *Locals;
struct tagTSymbolTable *LocalSymtab;
void *CodeSegment;
} Routine;
struct {
int Offset;
struct tagTSymbolTable *RecordIDP;
} Data;
}Info;
} TDefineStruct, *PDefineStruct; typedef struct tagTypeStruct {
TypeForm Form;
int Size;
struct tagTSymbolTable *TypeIDP;
union {
struct {
struct TypeStruct *ConstIDP;
int Max;
} Enum;
struct {
struct tagTypeStruct
*IndexType,*ElemType;
int MinIndex,MaxIndex;
int ElemCount;
} Array;
struct {
struct tagTSymbolTable
*FieldSymtab;
} Record;
} Info;
} TypeStruct, *PTypeStruct; typedef struct tagTSymbolTable {
char *Name;
struct tagTSymbolTable *Left,*Right,*Next; // 穿线二叉数
char *Info;
TDefineStruct Define;
PTypeStruct TypeP;
int Level;
int LabelIndex;
} TSymbolTable, *PSymbolTable; PSymbolTable NewSymtab(char *s);
void InitSymtabRoot(void);
extern TSymbolTable Root;
PSymbolTable SearchSymtab(char *s);
PSymbolTable EnterSymtab(char *s);
DWord GetVar(char *s);
void EnterVar(char *s,DWord index); 4、虚拟计算机:如果生成的代码目标平台无法执行或执行有困难,可以考虑生成虚拟计算机的代码,然后用自己的虚拟计算机执行。我们这里的虚拟计算机采用了栈结构方式。可以使YACC 在分析过程中同步生成代码。我们的虚拟机器代码和JAVA很相似(JAVA在SUN 中的实现,起初肯定是YACC)。这台虚拟计算机连Print 命令都认识。// 计算机的标志寄存器和控制寄存器
typedef struct tagTFlags {
Char EQ,NE,LE,LT,GE,GT;
Char Debug,Trace,Step;
} TFlags;
// 只有四个寄存器:当前代码地址、堆栈顶部、Stack Frame Top、标志及控制。
typedef struct tagTCPU {
DWord EIP;
DWord ESP;
DWord EBP;
TFlags Flags;
} TCPU, *PCPU; // 全局的CPU 变量
extern TCPU CPU;
// CPU 的动作
void Reset(void);
void Start(void);
void DeCode(DWord d);
void SetFlags(double r);
void Print(PVariant p);
void EnterProc(DWord n);
void LeaveProc(void); void PushChar(Char c);
void PushByte(Byte b);
void PushInteger(int value);
void PushDWord(DWord d);
void PushReal(double r);
void PushString(char *s);
void PushVar(PVariant p);
PVariant PopVar(void);
PVariant GetTosVar(void); // CPU 认识的指令
#define C_PUSHCHAR 101
#define C_PUSHBYTE 102
#define C_PUSHINTEGER 103
#define C_PUSHDWORD 104
#define C_PUSHREAL 105
#define C_PUSHSTRING 106 #define C_PUSHVAR 110 #define C_POPVAR 120
#define C_POPCMP 121 #define C_RELOP 200
#define C_ADDOP 201
#define C_MULOP 202
#define C_SIGNOP 203 #define C_PRINT_LINE 300
#define C_PRINT_COMMA 301
#define C_PRINT_SEMICOLON 302
#define C_PRINT_EXPR 303 #define C_JMP 400
#define C_JEQ 401
#define C_JNE 402 5、词法分析:我们使用LEX来做词法分析,查看LEX的代码后发现,它本身是用YACC生成的,很有意思。extern YYSTYPE yylval; int yywrap(void) { return 1;
} void SetReal(double r) {
yylval.Real=r;
yylval.Info.Type=REALTYPE;
}
void SetInteger(int n) {
yylval.Integer=n;
yylval.Info.Type=INTEGERTYPE;
}
void SetDWord(DWord n) {
yylval.UnsignedNumber=n;
yylval.Info.Type=DWORDTYPE;
}
void SetString(char *s) {
yylval.String=strdup(s);
yylval.Info.Type=STRINGTYPE;
} /* Delete any character in yyrval, normally is doublequota in string, etc:
"AAAAA""aaaaaaa" => AAAAA"aaaaaaa
*/
void DeleteChar(char c) {
char *s;
int i,j;
s=yylval.String;
i=strlen(s);
i-=2;
memmove(s,s+1,sizeof(Char)*i);
s[i]=0;
if(strlen(s)<2)
return;
for(i=0,j=0;*(s+j);i++,j++) {
*(s+i)=*(s+j);
if((*(s+j)==c)&&(*(s+j+1)==c))
j++;
}
*(s+i)=0;
}
%}
SPACE [ \r\n\t\f]
NQUOTE [^\"\n]
Digit [0-9]
DecDigit [1-9]
Zero [0]
OctDigit [0-7]
HexPrev [x|X]
HexDigit [0-7A-Fa-f]
Char [ -~]
Letter [A-Za-z_]
Id [A-Za-z0-9_] %%
"==" { SetInteger(EQUAL);return EQUAL; } "=" { SetInteger(ASSIGN);return ASSIGN; } "<" { SetInteger(LT);return LT; }
"<=" { SetInteger(LE);return LE; }
"<>" { SetInteger(NE);return NE; } ">=" { SetInteger(GE);return GE; }
">" { SetInteger(GT);return GT; } "+" { SetInteger(PLUS);return PLUS; } "-" { SetInteger(MINUS);return MINUS; } "*" { SetInteger(STAR);return STAR; } "/" { SetInteger(SLASH);return SLASH; } "%" { SetInteger(MOD);return MOD; }
"<<" { SetInteger(SHL);return SHL; } ">>" { SetInteger(SHR);return SHR; }
"&" { SetInteger(BITAND);return BITAND; } "|" { SetInteger(BITOR);return BITOR; }
"!" { SetInteger(BITNOT);return BITNOT; } "(" { SetInteger(LPAREN);return LPAREN; } ")" { SetInteger(RPAREN);return RPAREN; } "[" { SetInteger(LBRACKET);return LBRACKET; } "]" { SetInteger(RBRACKET);return RBRACKET; } "{" { SetInteger(BIGLPAREN);return BIGLPAREN; }
"}" { SetInteger(BIGRPAREN); return BIGRPAREN; }
"," { SetInteger(COMMA);return COMMA; } ";" { SetInteger(SEMICOLON);return SEMICOLON; }
":" { SetInteger(COLON);return COLON; } "." { SetInteger(DOT);return DOT;} "and"
{ SetInteger(AND);return AND; }
"not" { SetInteger(NOT);return NOT; }
"or" { SetInteger(OR);return OR; } "dim"
{ SetInteger(DIM);return DIM; }
"array" { SetInteger(ARRAY);return ARRAY; }
"as" { SetInteger(AS);return AS; }
"byval" { SetInteger(BYVAL);return BYVAL;} "case" { SetInteger(CASE);return CASE; } "const" { SetInteger(CONST);return CONST; } "function" { SetInteger(FUNCTION);return FUNCTION;} "goto" { SetInteger(GOTO);return GOTO;} "label" { SetInteger(LABEL);return LABEL;} "procedure" { SetInteger(PROCEDURE);return PROCEDURE;}
"program" { SetInteger(PROGRAM);return PROGRAM; } "char" { SetInteger(CHAR);return CHAR; } "byte" { SetInteger(BYTE);return BYTE; } "integer" { SetInteger(INTEGER);return INTEGER; } "dword" { SetInteger(DWORD);return DWORD; } "real" { SetInteger(REAL);return REAL; } "string" { SetInteger(STRING);return STRING; } "if" { SetInteger(IF);return IF; }
"then" { SetInteger(THEN);return THEN; } "else" { SetInteger(ELSE);return ELSE; } "for" { SetInteger(FOR);return FOR; } "while" { SetInteger(WHILE);return WHILE; }
"to" { SetInteger(TO);return TO; } "downto" { SetInteger(DOWNTO);return DOWNTO; }
"do" { SetInteger(DO);return DO; }
"of" { SetInteger(OF);return OF; }
"record" { SetInteger(RECORD);return RECORD; } "with" { SetInteger(WITH);return WITH;} ("quit"|"q")
{ SetInteger(EXIT);return EXIT; }
("exit"|"e") { SetInteger(EXIT);return EXIT; } ("print"|"?") {SetInteger(PRINT);return PRINT;}
"run" { SetInteger(RUN);return RUN;} {Letter}{Id}* {
/* ID */
SetString(yytext);
return ID;
} \"({NQUOTE}|\"\")*\" {
/* short string */
SetString(yytext);
DeleteChar('\"');
return
SHORTSTRING;
}
{DecDigit}{Digit}* {
/* dec */ SetDWord(strtoul(yytext,NULL,10));
return(UNSIGNED_NUMBER);
}
{Zero}{OctDigit}* { /* oct */ SetDWord(strtoul(yytext,NULL,8));
return(UNSIGNED_NUMBER);
}
{Zero}{HexPrev}{HexDigit}+ { /* hex */ SetDWord(strtoul(yytext,NULL,16));
return(UNSIGNED_NUMBER);
}
{Digit}+"."{Digit}+ {
/* float */ SetReal(atof(yytext));
return(REALNUMBER);
}
{Digit}+"."{Digit}+[Ee][+-]?{Digit}+ {
/* sce */ SetReal(atof(yytext));
return(REALNUMBER);
}
"//".* ; { /* line comments */ }
{SPACE} ;
. |
%% 6、语法分析:使用YACC来生成语法数。这里同时就生成了代码,没有考虑代码优化的问题。%{
%}
// 这是Token 的数据结构
%Union {
int Integer;
DWord UnsignedNumber;
double Real;
Char *String;
struct {
double noused;
int Type;
} Info;
} %token UNSIGNED_NUMBER REALNUMBER SHORTSTRING ID
%token LT LE EQUAL NE GE GT ASSIGN
%token PLUS MINUS STAR SLASH MOD SHL SHR BITNOT BITAND BITOR
%token LPAREN RPAREN OR AND NOT COMMA SEMICOLON COLON DOT
%token LBRACKET RBRACKET BIGLPAREN BIGRPAREN
%token DIM AS ARRAY CASE FUNCTION PROCEDURE PROGRAM LABEL
%token CHAR BYTE INTEGER DWORD REAL STRING %token RECORD CONST BYVAL
%token IF THEN ELSE FOR TO DOWNTO DO WHILE OF
GOTO WITH
%token EXIT PRINT RUN %type
REALNUMBER
%type UNSIGNED_NUMBER
%type SHORTSTRING ID
%type LT LE EQUAL NE GE GT ASSIGN %type PLUS MINUS STAR SLASH MOD SHL SHR BITNOT BITAND BITOR
%type LPAREN RPAREN OR AND NOT COMMA SEMICOLON COLON DOT
%type LBRACKET RBRACKET BIGLPAREN BIGRPAREN
%type DIM AS ARRAY CASE FUNCTION PROCEDURE PROGRAM LABEL
%type CHAR BYTE INTEGER DWORD REAL STRING
%type IF THEN ELSE FOR TO DOWNTO WHILE OF GOTO WITH
%type RECORD CONST BYVAL
%type EXIT PRINT RUN %type
relop addop mulop signop datatype logicop %type variable variable_list label
%type primary factor term expression simple_expression expr expr_list
%type compilation_unit program program_header block
%type decl_sect_list decl_sect proc_decl func_decl param_list
%type proc_header func_header
proc_block fp_list fp_sect_list fp_sect
%type compound_statement stmt_list stmt normal_stmt
%type dim_statement goto_statement
for_statement if_statement
%type while_statement with_statement assign_statement proccall_statement
%type run_statement
%type print_statement print_expr_list print_dot
%right THEN ELSE // 个别需要右结合的
Token %% compilation_unit:program
;
program:program_header block
;
program_header: {}
|PROGRAM
|PROGRAM ID SEMICOLON
;
block :decl_sect_list compound_statement ;
decl_sect_list: {}
|decl_sect_list decl_sect
;
decl_sect:proc_decl
|func_decl
;
proc_decl:proc_header proc_block
;
func_decl:func_header proc_block
;
proc_header:PROCEDURE ID fp_list
;
func_header:FUNCTION ID fp_list AS datatype ;
proc_block:block
;
fp_list:{}
|LPAREN fp_sect_list RPAREN
;
fp_sect_list:fp_sect
|fp_sect_list SEMICOLON fp_sect
;
fp_sect:variable_list AS datatype
|BYVAL variable_list AS datatype
;
compound_statement:BIGLPAREN stmt_list BIGRPAREN ;
stmt_list:stmt
|stmt_list SEMICOLON stmt
;
stmt:normal_stmt
|lab