Path: csiph.com!newsfeed.hal-mli.net!feeder3.hal-mli.net!newsfeed.hal-mli.net!feeder1.hal-mli.net!nntp.club.cc.cmu.edu!newsfeed.news.ucla.edu!usenet.stanford.edu!news.iecc.com!nerds-end From: Jens Kallup Newsgroups: comp.compilers Subject: Re: Question about parser/parsing technics Date: Tue, 06 Mar 2012 19:52:29 +0100 Organization: CNNTP Lines: 363 Sender: news@iecc.com Approved: comp.compilers@iecc.com Message-ID: <12-03-008@comp.compilers> References: <12-03-006@comp.compilers> NNTP-Posting-Host: news.iecc.com X-Trace: leila.iecc.com 1331069508 29984 64.57.183.58 (6 Mar 2012 21:31:48 GMT) X-Complaints-To: abuse@iecc.com NNTP-Posting-Date: Tue, 6 Mar 2012 21:31:48 +0000 (UTC) Keywords: parse Posted-Date: 06 Mar 2012 16:31:48 EST X-submission-address: compilers@iecc.com X-moderator-address: compilers-request@iecc.com X-FAQ-and-archives: http://compilers.iecc.com Xref: csiph.com comp.compilers:475 Hello, you can try this grammar for a simple language: //here the lexer.l //compile: flex lexer.l %{ #include "syntaxtree.h" #include "string.h" #include #include #include "y.tab.cc" extern int lineno; char num_text[2048]; #ifdef FLEX_SCANNER #define INPUT_EOF EOF #else #define INPUT_EOF 0 #endif extern "C" int yywrap() { return 1; } void start_code_parser(FILE *fp) { /* YY_BUFFER_STATE old_flexer = YY_CURRENT_BUFFER; YY_BUFFER_STATE new_flexer = yy_create_buffer(fp,YY_BUF_SIZE); yy_switch_to_buffer(new_flexer); while (yyparse() != INPUT_EOF) yy_delete_buffer(new_flexer); if (old_flexer != NULL) yy_switch_to_buffer(old_flexer); */ yyparse(); } extern void RestartApplication(void); %} %x COMMENT %x STRING_BEG %x STRING_END %% "\*\*".*\n { BEGIN(INITIAL); lineno++; } "/*" { BEGIN(COMMENT); } (.*|\n*)"*/" { BEGIN(INITIAL); } "//".*\n { lineno++; } "if" { BEGIN(INITIAL); return _IF_;} "else" { BEGIN(INITIAL); return _ELSE_;} "endif" { BEGIN(INITIAL); return _END_IF_;} "repeat" { BEGIN(INITIAL); return _REPEAT_;} "until" { BEGIN(INITIAL); return _UNTIL_;} "scan" { BEGIN(INITIAL); return _SCAN_;} (\?) { BEGIN(INITIAL); return _PRINT_;} "local" { BEGIN(INITIAL); return _LOCAL_;} "parameter" { BEGIN(INITIAL); return _PARAMETER_;} "endfor" { BEGIN(INITIAL); return _ENDFOR_;} "new" { BEGIN(INITIAL); return _NEW_;} "for" { BEGIN(INITIAL); return _FOR_;} "while" { BEGIN(INITIAL); return _WHILE_;} "return" { BEGIN(INITIAL); return _RETURN_;} "==" { BEGIN(INITIAL); return _EQUAL_;} ">=" { BEGIN(INITIAL); return _GREQL_;} "<=" { BEGIN(INITIAL); return _LWEQL_;} "=>" { BEGIN(INITIAL); return _GREQL_;} "=<" { BEGIN(INITIAL); return _LWEQL_;} "++" { BEGIN(INITIAL); return _PLUS_PLUS_; } "--" { BEGIN(INITIAL); return _MINUS_MINUS_; } "+=" { BEGIN(INITIAL); return _PLUS_ASSIGN_; } "-=" { BEGIN(INITIAL); return _MINUS_ASSIGN_; } "*=" { BEGIN(INITIAL); return _TIMES_ASSIGN_; } "/=" { BEGIN(INITIAL); return _DIV_ASSIGN_; } "class" { BEGIN(INITIAL); return _CLASS_; } "of" { BEGIN(INITIAL); return _OF_; } "endclass" { BEGIN(INITIAL); return _ENDCLASS_; } "set" { BEGIN(INITIAL); return _SET_; } "with" { BEGIN(INITIAL); return _WITH_; } "endwith" { BEGIN(INITIAL); return _ENDWITH_; } "to" { BEGIN(INITIAL); return _TO_; } "precision" { BEGIN(INITIAL); return _PRECISION_; } "round" { BEGIN(INITIAL); return _ROUND_; } "space" { BEGIN(INITIAL); return _SPACE_; } "replicate" { BEGIN(INITIAL); return _REPLICATE_; } "trim" { BEGIN(INITIAL); return _TRIM_; } "rtrim" { BEGIN(INITIAL); return _RTRIM_; } "ltrim" { BEGIN(INITIAL); return _LTRIM_; } "abs" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _ABS_; } "sin" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _SIN_; } "cos" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _COS_; } "tan" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _TAN_; } "asin" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _ASIN_; } "acos" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _ACOS_; } "atan" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _ATAN_; } "sqrt" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _SQRT_; } "=" { BEGIN(INITIAL); return _ASSIGN_; } "+" { BEGIN(INITIAL); return _PLUS_; } "-" { BEGIN(INITIAL); return _MINUS_; } "*" { BEGIN(INITIAL); return _TIMES_; } "/" { BEGIN(INITIAL); return _DIV_;} "(" { BEGIN(INITIAL); return _OBR_;} ")" { BEGIN(INITIAL); return _CBR_;} "," { BEGIN(INITIAL); return _COMMA_; } (([-+]?)([0-9]+['.'0-9]*)) { BEGIN(INITIAL); yylval.val = (double) atof(yytext); return _NUM_; } [a-zA-Z_]+[0-9a-zA-Z_]* { yylval.name = strdup(yytext); return _ID_; } (\") { BEGIN(INITIAL); BEGIN(STRING_BEG); } ([0-9a-zA-Z_ \t\.\!\=\?\(\)\[\]\:\.\,\+\-\*\/\^]*) { yylval.name = strdup(yytext); BEGIN(STRING_END); } (\") { BEGIN(INITIAL); return _STRING_; } [\n]* { lineno++; } [ \t]* ; [\.] { BEGIN(INITIAL); return _POINT_; } . { ::yyrestart(yyin); QMessageBox::about(NULL,"pFehler",QString("Fehlerzeichen: %1 in Zeile: %2").arg(yytext[0]).arg(lineno)); lineno = 1; if_label = 0; expr_type = 2; text_counter = 0; char_counter = 0; nvar_counter = 0; RestartApplication(); } %% // here the bison lang-y file // compile: bison.exe -d lang.y %{ #include #include ,,, %} %union { double val; char* name; char* str; int id; int label; }; %token _IF_ _THEN_ _ELSE_ _END_IF_ _REPEAT_ _UNTIL_ _SCAN_ _PRINT_ _ID_ _NUM_ _EQUAL_ _FOR_ _WHILE_ _RETURN_ _DBLNUM_ %token _LWEQL_ _GREQL_ _ASSIGN_ _PLUS_ _MINUS_ _TIMES_ _DIV_ _STRBE_ _COMMA_ _TRIM_ _LTRIM_ _RTRIM_ %token _NEW_ _LOCAL_ _PARAMETER_ _ENDFOR_ %token _MINUS_ASSIGN_ _TIMES_ASSIGN_ _DIV_ASSIGN_ _PLUS_ASSIGN_ _ROUND_ _SET_ _TO_ _PRECISION_ %token _CLASS_ _OF_ _ENDCLASS_ _WITH_ _ENDWITH_ _POINT_ %token _ASIN_ _ACOS_ _ATAN_ _SIN_ _COS_ _SQRT_ _TAN_ _STRING_ _OBR_ _CBR_ _SPACE_ %token _PLUS_PLUS_ _MINUS_MINUS_ %token _ABS_ _REPLICATE_ %type _NUM_ %type _ID_ %type stmt_seq stmt if_stmt repeat_stmt for_stmt while_stmt assign_stmt read_stmt write_stmt %type exp simple_exp term factor exec_stmt handle_string ari_stmt set_stmt class_stmt with_stmt with_this %type object_vars class_cmd_stmt misc_stmt local_stmt program : | stmt_seq { } ; stmt_seq : { } | stmt_seq stmt { } | stmt { } ; stmt: if_stmt { } | repeat_stmt { } | assign_stmt { } | read_stmt { } | write_stmt { } | for_stmt { } | while_stmt { } | ari_stmt { } | set_stmt { } | class_stmt { } | misc_stmt { } ; assign_stmt : _ID_ _ASSIGN_ { assign_flag = true; } exp { code_str += QString("\tfstp qword [_LC%1]\n").arg($1); if (!symbol_labels.contains(QString("_LC%1").arg($1))) { data_str += QString("_LC%1: dq 0.00\n").arg($1); symbol_labels << QString("_LC%1").arg($1); } assign_flag = false; } ; exp: simple_exp '<' simple_exp { expr_type = 0; } | simple_exp '>' simple_exp { expr_type = 1; } | simple_exp _EQUAL_ simple_exp { expr_type = 2; } | simple_exp _GREQL_ simple_exp { expr_type = 3; } | simple_exp _LWEQL_ simple_exp { expr_type = 4; } | simple_exp { } ; simple_exp : term _PLUS_ simple_exp { code_str += QString("\tfaddp st1, st0\n"); } | term _MINUS_ simple_exp { code_str += QString("\tfsubp st1, st0\n"); } | term { } ; term: factor _TIMES_ term { code_str += QString("\tfmulp st1, st0\n"); } | factor _DIV_ term { code_str += QString("\tfdivp st1, st0\n"); } | factor {} ; factor : _OBR_ exp _CBR_ { code_str += QString("\tfld qword [_LC%1]\n").arg(FLastID); } | _NUM_ { temp_str = QString("%1").arg((double)$1); if (temp_str.contains('.') == false) temp_str += QString(".00"); nvar_counter++; code_str.append(QString("\tfld qword [_LC%1]\n").arg(nvar_counter-1)); data_str += QString("_LC%1:\tdq %2\n").arg(nvar_counter-1).arg(temp_str); last_result = NUMBER; } | _ROUND_ _OBR_ { assign_flag = true; } exp { code_str += QString( "\tfstp qword [_LCtmp]\n" "\tpush dword [_LC%1+4]\n" "\tpush dword [_LC%1]\n" ).arg(nvar_counter-1); } _COMMA_ exp _CBR_ { code_str += QString( "\tfstp qword [_LCtmp]\n" "\tpush dword [_LC%1+4]\n" "\tpush dword [_LC%1]\n" "\tcall _round_val\n" ).arg(nvar_counter-1); } | _ID_ { last_result = 0; //if (do_function_call) code_str += QString("\tfld qword [_LC%1]\n").arg($1); FLastID = $1; stack_counter += 4; if (!symbol_labels.contains(QString("_LC%1").arg($1))) { data_str += QString("_LC%1: dq 0.00\n").arg($1); symbol_labels << QString("_LC%1").arg($1); } } | _ID_ _PLUS_PLUS_ { code_str += QString( "\tfld qword [_LC%1]\n" "\tfld qword [_LCtmp_1]\n" "\tfaddp qword [_LC%1]\n" ).arg($1); } | _ID_ _MINUS_MINUS_ { code_str += QString( "\tfld qword [_LC%1]\n" "\tfld qword [_LCtmp_1]\n" "\tfsub qword [_LC%1]\n" "\tfstp qword [_LC%1]\n" ).arg($1); } | _SIN_ _OBR_ exp _CBR_ { internal_pusher("sin" ); } | _ASIN_ _OBR_ exp _CBR_ { internal_pusher("asin"); } | _ACOS_ _OBR_ exp _CBR_ { internal_pusher("acos"); } | _ATAN_ _OBR_ exp _CBR_ { internal_pusher("atan"); } | _COS_ _OBR_ exp _CBR_ { internal_pusher("cos" ); } | _TAN_ _OBR_ exp _CBR_ { internal_pusher("tan" ); } | _SQRT_ _OBR_ exp _CBR_ { internal_pusher("sqrt"); } | _ABS_ _OBR_ exp _CBR_ { code_str += QString("\tfchs\n"); } ; endif_stmt : _END_IF_ { } ; %% void yyerror(char * message) { QMessageBox::about(NULL,"error",QString("%1: line %2").arg(message).arg(lineno)); ::yyrestart(yyin); } void internal_pusher(QString id) { if (id == "sin" || id == "cos" || id == "sqrt" ) code_str += QString( "\tf%1\n" "\tfstp qword [_LC%1]\n" "\tfld qword [_LC%1]\n" ).arg(id); if (id == "asin" || id == "acos" || id == "atan" || id == "tan" ) code_str += QString( "\tfstp qword [_LCtmp]\n" "\tpush dword [_LCtmp+4]\n" "\tpush dword [_LCtmp]\n" "\tcall _%1\n" "\tadd esp, 12\n").arg(id); } cheers Jens