Sei sulla pagina 1di 11

Compiler Project Report On Mini C++ Compiler using lex and yacc

Submitted By :-
K Sai Rohan(1602-16-733-039)

J Udaykiran(1602-16-733-307)
1. Problem statement
 Construct a mini C++ type compiler.
 It should be able to strictly identify only C++ code.
 It should report an error And any C code which is acceptable in C++.

2. Introduction
The C and C++ programming languages are closely related. C++ grew out of C, as it was
designed to be source-and-link compatible with C. C++ was based on C and retains a great deal
of the functionality. The C++ language provides mechanisms for mixing code that is compiled
by compatible C and C++ compilers in the same program. As a matter of fact, C++ can run most
of C code while C cannot run most C++ code. 
The purpose of compatibility with C is so that C++ programs can have convenient access to
the billions (trillions?) of lines of existing C code in the world.
Although, C and C++ code are almost compatible but there are still many incompatibilities or
conflicts between them. The conflicts can be of two types:
1. Incompatible C feature - valid as C code but not as C++ code.
2. Incompatible C++ feature - valid as C++ code but not as C code.

In this project we focus on a different domain. Compatible C/C++ features i.e. features of C
code that are valid in C++.
We aim at detecting such snippets of code in our input program and will give an error if a C code
is detected, whilst if no C code could be detected then we will compile it for minor errors, i.e. a
mini compiler strictly for C++.
An example of C code valid in C++ code:

#include <iostream>
#include <cstdio>
#include <cmath>

using namespace std;

int main()
{
int x;

// C++ style IO
cout << “Hello World, Enter a number: ” << endl;
cin >> x;
printf(“Hello World, Enter a number: ”);
scanf(“%d”, &x);

/* A fuction call invalid in C++ *


* if not including C header files. */
x = sqrt(4);

return 0;
}

In addition to detect C code inside C++ code, our Mini C++ compiler will also be able to report
following errors to the user:
 Invalid variable name.
 Invalid basic arithmetic expression.
 Syntax error in While loop.
 Syntax errors in For loop.
 Syntax errors in If-Then-Else.

3. Tools used
We have used following two tools to implement our project:
1. YACC - produces a parser
2. LEX - generates lexical analyzers
3. GCC – gnu C Compiler
4. G++ - gnu C++ Compiler

4. Methodology or Algorithm
The project is implemented in the following steps: -
1. Read the given Input.
2. Tokenize the input using Lex rules.
3. Parse using Yacc rules.
4. Run the Algorithm described below.
Algorithm:-
Goal: detect a c code that is generally successfully compiled by a C++ compiler
and accept small C++ codes.
Steps:
1. Detect for header files (generally all c codes have .h header files)
2. Detect C language functions and keywords that are compatible with C++
compilers.
3. If(c code detected )
Then return ERROR and STOP.
Else goto step 4.
4. Check for error like
o Invalid variable name.
o Invalid basic arithmetic expression.
o Syntax error in While loop.
o Syntax errors in For loop.
Syntax errors in If-Then-Else
Flowchart
Lex Code to identify the tokens, identifiers, keywords, operators.

%{

#include <stdio.h>
#include <stdlib.h>
int fvar = 1;
%}
alpha [a-zA-Z]
digit [0-9]
%%
\"({alpha}|{digit}|[ ])*\" { fvar = 0;return COUTSTR; }
[ \t] { fvar = 0; }
[\n][ \t]* { fvar = 1; yylineno = yylineno + 1;}
cout { fvar = 0; return COUT; }
endl { fvar = 0;return ENDL;}
cin { fvar = 0;return CIN;}
public|private|protected { fvar = 0;return ACCESS;}
int { fvar = 0;return INT;}
float { fvar = 0;return FLOAT;}
char { fvar = 0;return CHAR;}
void { fvar = 0;return VOID;}
double { fvar = 0;return DOUBLE;}
fn1|fn2 { fvar = 0;return FNAME;}
fn { fvar = 0;return NFNAM;}
for { fvar = 0;return FOR;}
while { fvar = 0;return WHILE;}
if { fvar = 0;return IF;}
else { fvar = 0;return ELSE;}
printf { fvar = 0;return PRINTF;}
struct { fvar = 0;return STRUCT;}
class { fvar = 0;return CLASS;}
^"#include ".+ ;
{digit}+ { fvar = 0;return NUM;}
{alpha}({alpha}|{digit})* {
if (fvar) {
yylval = *yytext - 'a';
printf("fvar = 1 yylval = %d\n", yylval);
return CUSTOM;
}
fvar = 0;
yylval = *yytext - 'a';
printf("fvar = 0 ID: %s\n", yytext);
return ID;
}
"<<" { fvar = 0;return COUTOP;}
">>" { fvar = 0;return CINOP;}
"<=" { fvar = 0;return LE;}
">=" { fvar = 0;return GE;}
"==" { fvar = 0;return EQ;}
"!=" { fvar = 0;return NE;}
">" { fvar = 0;return GT;}
"<" { fvar = 0;return LT;}
"." { fvar = 0;return DOT;}
\/\/.* ;
\/\*(.*\n)*.*\*\/ ;
. return yytext[0];
%%

Yacc code to check errors:

%{

#include <stdio.h>
#include <stdlib.h>
extern FILE *fp;
int className[26] ={0};
%}
%token INT FLOAT CHAR DOUBLE VOID
%token FOR WHILE
%token IF ELSE PRINTF
%token STRUCT CLASS
%token NUM ID
%token INCLUDE
%token DOT
%token COUTSTR COUT ENDL COUTOP
%token CIN CINOP
%token ACCESS CUSTOM FNAME NFNAM
%right '='
%left AND OR
%left '<' '>' LE GE EQ NE LT GT
%%
start: Declaration
| Function
| start Function
| start Declaration
;
/* Declaration block */
Declaration: Type Assignment ';'
| Assignment ';'
| FunctionCall ';'
| ArrayUsage ';'
| Type ArrayUsage ';'
| StructStmt ';'
| ClassStmt ';'
| error
;
Arg: Type ID
;
/* Assignment block */
Assignment: ID '=' Assignment
| ID '=' FunctionCall
| ID '=' ArrayUsage
| ArrayUsage '=' Assignment
| ID ',' Assignment
| NUM ',' Assignment
| ID '+' Assignment
| ID '-' Assignment
| ID '*' Assignment
| ID '/' Assignment
| NUM '+' Assignment
| NUM '-' Assignment
| NUM '*' Assignment
| NUM '/' Assignment
| '\'' Assignment '\''
| '(' Assignment ')'
| '-' '(' Assignment ')'
| '-' NUM
| '-' ID
| NUM
| ID
;
/* Function Call Block */
FunctionCall : ID'('')'
| ID'('Assignment')'
| CUSTOM DOT FNAME '('')'
| CUSTOM DOT NFNAM '('')' {printf("unknown function\n");return 0;}
;
/* Array Usage */
ArrayUsage : ID'['Assignment']'
;
/* Function block */
Function: Type ID '(' ArgListOpt ')' CompoundStmt
| Type FNAME '('')' CompoundStmt
;

ArgListOpt: ArgList
|
;
ArgList: ArgList ',' Arg
| Arg
;
CompoundStmt: '{' StmtList '}'
;
StmtList: StmtList Stmt
|
;
Stmt: WhileStmt
| Declaration
| ForStmt
| IfStmt
| PrintFunc
| coutstatement
| cinstatement
| ';'
;
/* Type Identifier block */
Type: INT
| FLOAT
| CHAR
| DOUBLE
| VOID
| CUSTOM {printf("here\n");
if (className[$1] == 0) {
printf("Error: Unknown Class ID\nQuiting!");
return 0;
}
}
;
/* Loop Blocks */
WhileStmt: WHILE '(' Expr ')' Stmt
| WHILE '(' Expr ')' CompoundStmt
;
/* For Block */
ForStmt: FOR '(' Expr ';' Expr ';' Expr ')' Stmt
| FOR '(' Expr ';' Expr ';' Expr ')' CompoundStmt
| FOR '(' Expr ')' Stmt
| FOR '(' Expr ')' CompoundStmt
;
/* IfStmt Block */
IfStmt : IF '(' Expr ')'
Stmt
;
/* Struct Statement */
StructStmt : STRUCT ID '{' Declaration '}'
;
ClassStmt : CLASS ID '{' ACCESS ':' start ACCESS ':' start '}'
{ className[$2] = 1; printf("value of classvariable = %d\n", $2);}
| CLASS ID ':' ACCESS ID '{' ACCESS ':' start ACCESS ':' start '}'
{ className[$2] = 1; printf("value of classvariable = %d\n", $2); }
;
/* Print Function */
PrintFunc : PRINTF '(' Expr ')' ';'
;
/*Expression Block*/
Expr:
| Expr LE Expr
| Expr GE Expr
| Expr NE Expr
| Expr EQ Expr
| Expr GT Expr
| Expr LT Expr
| Assignment
| ArrayUsage
;

coutstatement:
COUT COUTOP COUTSTR COUTOP ENDL;
cinstatement:
CIN CINOP ID;
%%
#include"lex.yy.c"
#include<ctype.h>
int count=0;
int main(int argc, char *argv[])
{
yyin = fopen(argv[1], "r");

if(!yyparse())
printf("\nParsing complete\n");
else
printf("\nParsing failed\n");
fclose(yyin);
return 0;
}

yyerror(char *s) {
printf("%d : %s %s\n", yylineno, s, yytext );
}

Potrebbero piacerti anche