Groups | Search | Server Info | Keyboard shortcuts | Login | Register [http] [https] [nntp] [nntps]


Groups > comp.compilers > #475

Re: Question about parser/parsing technics

From Jens Kallup <jkallup@web.de>
Newsgroups comp.compilers
Subject Re: Question about parser/parsing technics
Date 2012-03-06 19:52 +0100
Organization CNNTP
Message-ID <12-03-008@comp.compilers> (permalink)
References <12-03-006@comp.compilers>

Show all headers | View raw


Hello,

you can try this grammar for a simple language:

//here the lexer.l
//compile: flex lexer.l

%{
#include "syntaxtree.h"
#include "string.h"
#include <stdlib.h>
#include <QMessageBox>
#include "y.tab.cc"

extern int lineno;
char num_text[2048];

#ifdef FLEX_SCANNER
#define INPUT_EOF EOF
#else
#define INPUT_EOF 0
#endif

extern "C" int yywrap() { return 1; }

void start_code_parser(FILE *fp)
{
     /*
     YY_BUFFER_STATE old_flexer = YY_CURRENT_BUFFER;
     YY_BUFFER_STATE new_flexer = yy_create_buffer(fp,YY_BUF_SIZE);
     yy_switch_to_buffer(new_flexer);
     while (yyparse() != INPUT_EOF)

     yy_delete_buffer(new_flexer);
     if (old_flexer != NULL)
     yy_switch_to_buffer(old_flexer);
     */
     yyparse();
}

extern void RestartApplication(void);
%}

%x COMMENT
%x STRING_BEG
%x STRING_END

%%

"\*\*".*\n		{  BEGIN(INITIAL); lineno++; }
"/*"                    { BEGIN(COMMENT); }
<COMMENT>(.*|\n*)"*/"   { BEGIN(INITIAL); }
"//".*\n    { lineno++; }
"if"        { BEGIN(INITIAL); return _IF_;}
"else"      { BEGIN(INITIAL); return _ELSE_;}
"endif"     { BEGIN(INITIAL); return _END_IF_;}
"repeat"    { BEGIN(INITIAL); return _REPEAT_;}
"until"     { BEGIN(INITIAL); return _UNTIL_;}
"scan"      { BEGIN(INITIAL); return _SCAN_;}
(\?)	    { BEGIN(INITIAL); return _PRINT_;}
"local"     { BEGIN(INITIAL); return _LOCAL_;}
"parameter" { BEGIN(INITIAL); return _PARAMETER_;}
"endfor"    { BEGIN(INITIAL); return _ENDFOR_;}
"new"       { BEGIN(INITIAL); return _NEW_;}
"for"       { BEGIN(INITIAL); return _FOR_;}
"while"     { BEGIN(INITIAL); return _WHILE_;}
"return"    { BEGIN(INITIAL); return _RETURN_;}
"=="        { BEGIN(INITIAL); return _EQUAL_;}
">="	    { BEGIN(INITIAL); return _GREQL_;}
"<="	    { BEGIN(INITIAL); return _LWEQL_;}
"=>"	    { BEGIN(INITIAL); return _GREQL_;}
"=<"        { BEGIN(INITIAL); return _LWEQL_;}
"++"		{ BEGIN(INITIAL); return _PLUS_PLUS_; }
"--"		{ BEGIN(INITIAL); return _MINUS_MINUS_; }
"+="		{ BEGIN(INITIAL); return _PLUS_ASSIGN_; }
"-="		{ BEGIN(INITIAL); return _MINUS_ASSIGN_; }
"*="		{ BEGIN(INITIAL); return _TIMES_ASSIGN_; }
"/="		{ BEGIN(INITIAL); return _DIV_ASSIGN_; }
"class"		{ BEGIN(INITIAL); return _CLASS_; }
"of"		{ BEGIN(INITIAL); return _OF_; }
"endclass"	{ BEGIN(INITIAL); return _ENDCLASS_; }
"set"		{ BEGIN(INITIAL); return _SET_; }
"with"		{ BEGIN(INITIAL); return _WITH_; }
"endwith"	{ BEGIN(INITIAL); return _ENDWITH_; }
"to"		{ BEGIN(INITIAL); return _TO_; }
"precision"	{ BEGIN(INITIAL); return _PRECISION_; }
"round" 	{ BEGIN(INITIAL); return _ROUND_; }
"space" 	{ BEGIN(INITIAL); return _SPACE_; }
"replicate"	{ BEGIN(INITIAL); return _REPLICATE_; }
"trim"		{ BEGIN(INITIAL); return _TRIM_; }
"rtrim" 	{ BEGIN(INITIAL); return _RTRIM_; }
"ltrim" 	{ BEGIN(INITIAL); return _LTRIM_; }
"abs"		{ BEGIN(INITIAL); yylval.name = strdup(yytext); return _ABS_; }
"sin"		{ BEGIN(INITIAL); yylval.name = strdup(yytext); return _SIN_; }
"cos"		{ BEGIN(INITIAL); yylval.name = strdup(yytext); return _COS_; }
"tan"		{ BEGIN(INITIAL); yylval.name = strdup(yytext); return _TAN_; }
"asin"		{ BEGIN(INITIAL); yylval.name = strdup(yytext); return _ASIN_; }
"acos"		{ BEGIN(INITIAL); yylval.name = strdup(yytext); return _ACOS_; }
"atan"		{ BEGIN(INITIAL); yylval.name = strdup(yytext); return _ATAN_; }
"sqrt"		{ BEGIN(INITIAL); yylval.name = strdup(yytext); return _SQRT_; }
"="	{ BEGIN(INITIAL); return _ASSIGN_; }
"+"	{ BEGIN(INITIAL); return _PLUS_; }
"-"	{ BEGIN(INITIAL); return _MINUS_; }
"*"	{ BEGIN(INITIAL); return _TIMES_; }
"/"	{ BEGIN(INITIAL); return _DIV_;}
"("	{ BEGIN(INITIAL); return _OBR_;}
")"	{ BEGIN(INITIAL); return _CBR_;}
","	{ BEGIN(INITIAL); return _COMMA_; }
(([-+]?)([0-9]+['.'0-9]*))	{
		BEGIN(INITIAL);
		yylval.val = (double) atof(yytext);
		return _NUM_;
				}
[a-zA-Z_]+[0-9a-zA-Z_]* {
             yylval.name = strdup(yytext);
             return _ID_;
             }
(\")				{ BEGIN(INITIAL); BEGIN(STRING_BEG); }
<STRING_BEG>([0-9a-zA-Z_ \t\.\!\=\?\(\)\[\]\:\.\,\+\-\*\/\^]*)	{
yylval.name = strdup(yytext); BEGIN(STRING_END); }
<STRING_END>(\")		{ BEGIN(INITIAL); return _STRING_; }
[\n]*       { lineno++; }
[ \t]*      ;
[\.]	{ BEGIN(INITIAL); return _POINT_; }
.	{
                 ::yyrestart(yyin);
		QMessageBox::about(NULL,"pFehler",QString("Fehlerzeichen: %1 in Zeile:
%2").arg(yytext[0]).arg(lineno));
		lineno = 1;

		if_label  = 0;
		expr_type = 2;

		text_counter = 0;
		char_counter = 0;
		nvar_counter = 0;

		RestartApplication();
	}
%%


// here the bison lang-y file
// compile: bison.exe -d lang.y
%{
#include <stdio.h>
#include <stdlib.h>
,,,
%}

%union {
	double	val;
	char*	name;
	char*	str;
	int	id;
	int	label;
};

%token _IF_ _THEN_ _ELSE_ _END_IF_ _REPEAT_ _UNTIL_ _SCAN_ _PRINT_ _ID_
_NUM_ _EQUAL_ _FOR_ _WHILE_ _RETURN_ _DBLNUM_
%token _LWEQL_ _GREQL_ _ASSIGN_ _PLUS_ _MINUS_ _TIMES_ _DIV_ _STRBE_
_COMMA_ _TRIM_ _LTRIM_ _RTRIM_
%token _NEW_ _LOCAL_ _PARAMETER_ _ENDFOR_
%token _MINUS_ASSIGN_ _TIMES_ASSIGN_ _DIV_ASSIGN_ _PLUS_ASSIGN_ _ROUND_
_SET_ _TO_ _PRECISION_
%token _CLASS_ _OF_ _ENDCLASS_ _WITH_ _ENDWITH_ _POINT_
%token <name> _ASIN_ _ACOS_ _ATAN_ _SIN_ _COS_ _SQRT_ _TAN_ _STRING_
_OBR_ _CBR_ _SPACE_
%token <name> _PLUS_PLUS_ _MINUS_MINUS_
%token <name> _ABS_ _REPLICATE_
%type <val> _NUM_
%type <name> _ID_
%type <id> stmt_seq stmt  if_stmt repeat_stmt for_stmt while_stmt
assign_stmt read_stmt write_stmt
%type <id> exp simple_exp term factor exec_stmt handle_string ari_stmt
set_stmt class_stmt with_stmt with_this
%type <id> object_vars class_cmd_stmt misc_stmt local_stmt


program :
     | stmt_seq {  }
     ;

stmt_seq
     : { }
     | stmt_seq stmt { }
     | stmt { }
     ;

stmt: if_stmt	  {  }
     | repeat_stmt {  }
     | assign_stmt {  }
     | read_stmt   {  }
     | write_stmt  {  }
     | for_stmt    {  }
     | while_stmt  {  }
     | ari_stmt    {  }
     | set_stmt    {  }
     | class_stmt  {  }
     | misc_stmt   {  }
     ;


assign_stmt
     : _ID_ _ASSIGN_ { assign_flag = true; } exp
     {
	code_str += QString("\tfstp qword [_LC%1]\n").arg($1);

         if (!symbol_labels.contains(QString("_LC%1").arg($1)))
         {
             data_str += QString("_LC%1: dq 0.00\n").arg($1);
             symbol_labels << QString("_LC%1").arg($1);
         }


	assign_flag = false;
     }
;

exp:
       simple_exp '<'     simple_exp { expr_type = 0; }
     | simple_exp '>'     simple_exp { expr_type = 1; }
     | simple_exp _EQUAL_ simple_exp { expr_type = 2; }
     | simple_exp _GREQL_ simple_exp { expr_type = 3; }
     | simple_exp _LWEQL_ simple_exp { expr_type = 4; }
     | simple_exp		    { }
     ;

simple_exp :
     term _PLUS_ simple_exp
     {
	code_str += QString("\tfaddp st1, st0\n");
     }
     | term _MINUS_ simple_exp
     {
	code_str += QString("\tfsubp st1, st0\n");
     }
     | term { }
     ;

term:
     factor _TIMES_ term
     {
	code_str += QString("\tfmulp st1, st0\n");
     }
     | factor _DIV_ term
     {
	code_str += QString("\tfdivp st1, st0\n");
     }
     | factor {}
     ;

factor
     : _OBR_ exp _CBR_  {
         code_str += QString("\tfld qword [_LC%1]\n").arg(FLastID);
     }
     | _NUM_
     {
	temp_str  = QString("%1").arg((double)$1); if (temp_str.contains('.')
== false)
	temp_str += QString(".00");

	nvar_counter++;

	code_str.append(QString("\tfld qword [_LC%1]\n").arg(nvar_counter-1));
	data_str += QString("_LC%1:\tdq %2\n").arg(nvar_counter-1).arg(temp_str);

	last_result = NUMBER;
     }
     | _ROUND_ _OBR_ { assign_flag = true; } exp {
	code_str += QString(
		"\tfstp qword [_LCtmp]\n"
		"\tpush dword [_LC%1+4]\n"
		"\tpush dword [_LC%1]\n"
		).arg(nvar_counter-1);
     } _COMMA_ exp _CBR_ {
	code_str += QString(
		"\tfstp qword [_LCtmp]\n"
		"\tpush dword [_LC%1+4]\n"
		"\tpush dword [_LC%1]\n"
		"\tcall _round_val\n"
		).arg(nvar_counter-1);
     }
     | _ID_
     {
	last_result = 0;

         //if (do_function_call)
	code_str += QString("\tfld qword [_LC%1]\n").arg($1);

         FLastID = $1;
	stack_counter += 4;

         if (!symbol_labels.contains(QString("_LC%1").arg($1)))
         {
             data_str += QString("_LC%1: dq 0.00\n").arg($1);
             symbol_labels << QString("_LC%1").arg($1);
         }
     }
     | _ID_ _PLUS_PLUS_ {
	code_str += QString(
		"\tfld qword [_LC%1]\n"
		"\tfld qword [_LCtmp_1]\n"
		"\tfaddp qword [_LC%1]\n"
		).arg($1);
     }
     | _ID_ _MINUS_MINUS_ {
	code_str += QString(
		"\tfld qword [_LC%1]\n"
		"\tfld qword [_LCtmp_1]\n"
		"\tfsub qword [_LC%1]\n"
		"\tfstp qword [_LC%1]\n"
		).arg($1);
     }
     | _SIN_  _OBR_ exp _CBR_ { internal_pusher("sin" ); }
     | _ASIN_ _OBR_ exp _CBR_ { internal_pusher("asin"); }
     | _ACOS_ _OBR_ exp _CBR_ { internal_pusher("acos"); }
     | _ATAN_ _OBR_ exp _CBR_ { internal_pusher("atan"); }
     | _COS_  _OBR_ exp _CBR_ { internal_pusher("cos" ); }
     | _TAN_  _OBR_ exp _CBR_ { internal_pusher("tan" ); }
     | _SQRT_ _OBR_ exp _CBR_ { internal_pusher("sqrt"); }
     | _ABS_  _OBR_ exp _CBR_ { code_str += QString("\tfchs\n"); }
     ;

endif_stmt
     : _END_IF_ { }
     ;
%%

void yyerror(char * message)
{
     QMessageBox::about(NULL,"error",QString("%1: line
%2").arg(message).arg(lineno));
     ::yyrestart(yyin);
}


void internal_pusher(QString id)
{
   if (id == "sin"
   ||  id == "cos"
   ||  id == "sqrt" )
   code_str += QString(
	"\tf%1\n"
	"\tfstp qword [_LC%1]\n"
	"\tfld qword [_LC%1]\n"
	).arg(id);

   if (id == "asin"
   ||  id == "acos"
   ||  id == "atan"
   ||  id == "tan" )
   code_str += QString(
	"\tfstp qword [_LCtmp]\n"
	"\tpush dword [_LCtmp+4]\n"
	"\tpush dword [_LCtmp]\n"
	"\tcall _%1\n"
	"\tadd esp, 12\n").arg(id);
}

cheers
Jens

Back to comp.compilers | Previous | NextPrevious in thread | Find similar


Thread

Question about parser/parsing technics spy974@gmail.com - 2012-03-06 05:21 -0800
  Re: Question about parser/parsing technics Jens Kallup <jkallup@web.de> - 2012-03-06 19:52 +0100

csiph-web