Sei sulla pagina 1di 22

/*The driver program*/ #include <iostream> #include "string" using namespace std; #include "InputChecker.h" #include "Functions.

h" #include "Error.h" int main(int argc, char *argv[]) { InputChecker lc(argc, argv); Functions lex_func(argv[2]); Functions lex_func_2(argv[2]); Error lex_err; switch(lc.check()) { case InputChecker::SHOW_HELP: { lex_func.showHelp(); break; } case InputChecker::GEN_CHR_STRM: { lex_func.generateCharStream(argv[2]); break; } case InputChecker::GEN_SRC_STRM: { lex_func.generateSourceStream(argv[2]); break; } case InputChecker::GEN_TOK_STRM: { //lex_func.generateTokenStream(argv[2]); lex_func.generateTokenStream2(argv[2]); //lex_func.printCmd(); break; } case InputChecker::GEN_SYM_TAB: { lex_func.generateSymbolTable(argv[2]); break; } case InputChecker::NO_PARAM: { lex_err.showError(InputChecker::NO_PARAM); break; } case InputChecker::NO_SRC_FILE: { lex_err.showError(InputChecker::NO_SRC_FILE); break; } case InputChecker::INVALID_SWITCH: { lex_err.showError(InputChecker::INVALID_SWITCH);

break; } case InputChecker::INVALID_SRC_FILE: { lex_err.showError(InputChecker::INVALID_SRC_FILE); break; } } return 0; } /*CM.h CharMap*/ #include "string" class Charmap { public: Charmap(); enum CHAR_TYPE {LETTER, DIGIT, SYMBOL, STRING_DELIM, WHITESPACE, ERROR, DOT}; CHAR_TYPE getCharType(int); private: CHAR_TYPE cm[256]; }; Charmap::Charmap() { unsigned int i; for(i=0 ; i<256 ; i++) { cm[i] = ERROR; } for(i=65 ; i<=89 ; i++) { cm[i] = LETTER; } for(i=97 ; i<=122 ; i++) { cm[i] = LETTER; } for(i=48 ; i<=57 ; i++) { cm[i] = DIGIT; } for(i=33 ; i<=47 ; i++) { cm[i] = SYMBOL; } for(i=58 ; i<=63 ; i++) { cm[i] = SYMBOL; } for(i=91 ; i<=95 ; i++) { cm[i] = SYMBOL;

} for(i=123 ; i<=125 ; i++) { cm[i] = SYMBOL; } cm[32] = WHITESPACE; cm[34] = STRING_DELIM; cm[46] = DOT; } Charmap::CHAR_TYPE Charmap::getCharType(int n) { return cm[n]; } /*Error.h*/ #include "vector" class Error { public: Error(); void showError(int); private: vector<string> err_type; }; Error::Error() { err_type.push_back("\t-No parameters specified!"); err_type.push_back("\t-No source file specified!"); err_type.push_back("\t-Specified switch not found or invalid!"); err_type.push_back("\t-Invalid source file!"); } void Error::showError(int typev) { cout << "Lexer: A Hand-Coded Lexical Analyzer Program V.1.0." << endl; cout << "ERROR:" << endl; cout << err_type.at(typev) << endl; cout << "See lexer /help for help" << endl; }

#include #include #include #include #include #include

"fstream" "CM.h" "vector" "ctime" "KeywordList.h" "SymbolList.h"

#include "SymbolTable.h" char *token_str[8] = {"WORD","NUMBER","SYMBOL","STRING","ERROR","ID","INTEGER","REAL"}; char *chr_str[6] = {"LETTER", "DIGIT ", "SYMBOL", "STR_DL", "WHITESPACE", "ERROR "}; int chr_ctr[6] = {0}; int tok_ctr[5] = {0}; class Functions { public: enum TOKEN{WORD,NUMBER,SYMBOL,STRING,ERROR,ID,INTEGER,REAL}; enum SYMBOL_CODES{PLUS}; Functions(char *s); //constructor void generateSourceStream(char *s); //source stream void generateCharStream(char *s); //character stream void generateTokenStream(char *s); //token stream void generateTokenStream2(char *s); //token stream void generateSymbolTable(char *s); TOKEN getToken(); string getLexeme(); void printCmd(); void showHelp(); //help message

private: KeywordList kw; SymbolList sym; SymbolTable sym_tab; TOKEN _token; //token type instance string _lexeme; //lexeme holder fstream file_input; //file input instance ofstream file_output; //file output instance char _look_ahead; //look ahead Charmap tok_cm; //charmap instance int _comment_flag; //comment flag void checkComment(char,char, int *); int line_count; int token_ctr; int _getToken_rotator; int _getLexeme_rotator; time_t rawtime; //declare rawtime as time type struct tm *Tm; //declare Tm as a pointer of type struct tm int _DD, _MM, _YYYY, _hh, _min, _sec; //day, month, year, hour, minute, seconds vector<TOKEN> _token_list; vector<string> _lexeme_list; vector<string> special_symbol; //vector for special symbol int _line_number_ctr; vector<int> _line_number_holder; }; Functions::Functions(char *s) {

special_symbol.push_back("++"); special_symbol.push_back("--"); special_symbol.push_back("=="); special_symbol.push_back(">="); special_symbol.push_back("<="); special_symbol.push_back("+="); special_symbol.push_back("-="); special_symbol.push_back("*="); special_symbol.push_back("/="); special_symbol.push_back("%=");

//increment //decrement //equal to //greater than or equal to // less than or equal to //complement for x = x + 2 //complement for x = x - 2 //complement for x = x * 2 //complement for x = x / 2 //complement for x = x % 2

rawtime = time(NULL); //get calendar Tm = localtime(&rawtime); //get the current calendar time _DD = Tm->tm_mday; _YYYY = Tm->tm_year+1900; _MM = Tm->tm_mon+1; _hh = Tm->tm_hour; _min = Tm->tm_min; _sec = Tm->tm_sec; _lexeme = ""; _getToken_rotator=-1; _getLexeme_rotator=-1; _line_number_ctr = 0; } void Functions::generateSourceStream(char *s) { cout << "Scanning \"" << s << "\" . . ."; file_input.open(s,ios::in); file_output.open("output.lst"); string str; file_output << "Lexer: A Hand-Coded Lexical Analyzer Program v1.0" << endl; file_output << "Source File: " << s << "\t Date/Time: " << _MM << "/" << _DD << "/" << _YYYY << " " << _hh << ":" << _min << ":" << _sec; file_output << "\n\n"; file_output << "Source Stream: " << "\n\n" ; int line_count = 0; while(!file_input.eof()) { line_count++; getline(file_input, str); file_output << line_count << ":\t" << str << endl; } file_output << "\nSummary: " << line_count << " source lines" << endl; file_input.close();

file_output.close(); cout << " done."; } void Functions::generateCharStream(char *s) { cout << "Scanning \"" << s << "\" . . ."; file_input.open(s,ios::in); file_output.open("output.chr"); string str; file_output << "Lexer: A Hand-Coded Lexical Analyzer Program v1.0" << endl; file_output << "Source File: " << s << "\t Date/Time: " << _MM << "/" << _DD << "/" << _YYYY << " " << _hh << ":" << _min << ":" << _sec; file_output << "\n\n"; file_output << "Character Stream: " << "\n\n" ; Charmap::CHAR_TYPE charac; int line_c = 0; int char_c = 0; int val_before = 0; int i=0; while(!file_input.eof()) { line_c++; getline(file_input, str); i=0; while(i < str.length()) { if(str[i] != ' ') { char_c ++; charac = tok_cm.Charmap::getCharType(str[i]); chr_ctr[charac]++; file_output << chr_str[charac] << "('" << str[i] << "')" << " "; } if((char_c % 10) == 0 && char_c != val_before) //for formatting { file_output << endl; } val_before = char_c; i++; } } file_output << "\n\nSummary: " << line_c << " source line/s "; file_output << char_c << " character/s" << endl; file_output << "\t\t" << chr_ctr[Charmap::LETTER] << " letter/s " << chr_ctr[Charmap::DIGIT] << " digit/s ";

file_output << chr_ctr[Charmap::SYMBOL]<< " sysmbol/s, " << chr_ctr[Charmap::ERROR] << " error chars"; file_input.close(); file_output.close(); cout << " done."; } void Functions::generateTokenStream(char *s) { cout << "Scanning \"" << s << "\" . . ."; file_input.open(s,ios::in); file_output.open("output.tok"); string str; file_output file_output << _YYYY << " " file_output << << << << "Lexer: A Hand-Coded Lexical Analyzer Program v1.0" << endl; "Source File: " << s << "\t Date/Time: " << _MM << "/" << _DD << "/" _hh << ":" << _min << ":" << _sec; "\n\n";

file_output << "Token Stream: " << "\n\n" ; line_count = 0; int next_line_ctr = 0; int val_before = 0; token_ctr = 0; _comment_flag = 2; while(!file_input.eof()) { line_count++; getline(file_input, str); Charmap::CHAR_TYPE ct_curr; Charmap::CHAR_TYPE ct_prev; Charmap::CHAR_TYPE look_ahead; int i = 0; string sym_var; while(i < str.length()) { checkComment(str[i], str[i+1], &i); if(i > str.length()-1) { break; } if(str[i] != ' ' && _comment_flag == 2) { next_line_ctr++; _lexeme = ""; look_ahead = tok_cm.getCharType(str[i]); if(look_ahead == Charmap::LETTER) //look_ahead is a LETTER { _token = WORD;

ct_curr = tok_cm.getCharType(str[i]); while(ct_curr == Charmap::LETTER || ct_curr == Charmap::DIGIT) { _lexeme += str[i]; i++; ct_curr = tok_cm.getCharType(str[i]); } i--; tok_ctr[_token]++; token_ctr++; _token_list.push_back(_token); _lexeme_list.push_back(_lexeme); file_output << token_str[_token] << "(\"" << _lexeme << "\") "; _line_number_holder.push_back(line_count); } else if(look_ahead == Charmap::SYMBOL) //look_ahead is a SYMBOL { _token = SYMBOL; ct_curr = tok_cm.getCharType(str[i]); int ok=0; for(int ctr=0; ctr < (int) special_symbol.size(); ctr++) { if(special_symbol[ctr][0] == str[i] && special_symbol[ctr][1] == str[i+1]) { ok = 1; _lexeme += str[i]; _lexeme += str[i+1]; break; } } i++; if(!ok) { i--; _lexeme += str[i]; } tok_ctr[_token]++; token_ctr++; _token_list.push_back(_token); _lexeme_list.push_back(_lexeme); file_output << token_str[_token] << "(\"" << _lexeme << "\") "; _line_number_holder.push_back(line_count); } else if(look_ahead == Charmap::ERROR) //look_ahead is ERROR { _token = ERROR; ct_curr = tok_cm.getCharType(str[i]); while(ct_curr == Charmap::ERROR)

{ if(i > str.length()-1) break; _lexeme += str[i]; i++; ct_curr = tok_cm.getCharType(str[i]); } i--; tok_ctr[_token]++; token_ctr++; _token_list.push_back(_token); _lexeme_list.push_back(_lexeme); file_output << token_str[_token] << "(\"" << _lexeme << "\") "; _line_number_holder.push_back(line_count); } else if(look_ahead == Charmap::DIGIT) //look_ahead is NUMBER { _token = NUMBER; int digit_limit_ctr = 1; int digit_error = 0; ct_curr = tok_cm.getCharType(str[i]); while(ct_curr == Charmap::DIGIT || ct_curr == Charmap::DOT) { _lexeme += str[i]; i++; ct_curr = tok_cm.getCharType(str[i]); /* if(digit_limit_ctr == 5) { digit_error = 1; break; } digit_limit_ctr++;*/ } tok_ctr[_token]++; token_ctr++; _token_list.push_back(_token); _lexeme_list.push_back(_lexeme); file_output << token_str[_token] << "(\"" << _lexeme << "\") "; _line_number_holder.push_back(line_count); if(digit_error == 1) { _lexeme = ""; _token = ERROR; while(ct_curr == Charmap::DIGIT) { _lexeme += str[i]; i++; ct_curr = tok_cm.getCharType(str[i]); } digit_error = 0; tok_ctr[_token]++; token_ctr++;

_token_list.push_back(_token); _lexeme_list.push_back(_lexeme); file_output << token_str[_token] << "(\"" << _lexeme << "\") "; _line_number_holder.push_back(line_count); } i--; } else if(look_ahead == Charmap::STRING_DELIM) //look_ahead is STRING { _token = STRING; i++; ct_curr = tok_cm.Charmap::getCharType(str[i]); while(ct_curr != Charmap::STRING_DELIM) { if(ct_curr == Charmap::STRING_DELIM) { break; } _lexeme += str[i]; i++; ct_curr = tok_cm.getCharType(str[i]); if(i > str.length()-1) { /*line_count++; i=0; getline(file_input, str);*/ cout << "Unexpected end-of-file error" << endl; return; } } tok_ctr[_token]++; token_ctr++; _token_list.push_back(_token); _lexeme_list.push_back(_lexeme); file_output << token_str[_token] << "(\"" << _lexeme << "\") "; _line_number_holder.push_back(line_count); } }//end if i++; if((next_line_ctr % 4) == 0 && next_line_ctr != val_before) //exec next line { file_output << endl; } val_before = next_line_ctr; }//end while }//end while //if EOF has been reached and closing comment not found if(file_input.eof() && _comment_flag == 1) { cout << "Unexpected end-of-file error."; return;

} file_output << endl; file_output << "\nSummary: " << line_count << " source lines, " << token_ctr << " token/s" << endl; file_output << "\t\t" << tok_ctr[WORD] << " word token/s, " << tok_ctr[SYMBOL] << " special symbol/s" << endl; file_output << "\t\t" << tok_ctr[NUMBER] << " number token/s, " << tok_ctr[STRING] << " string token/s, " << tok_ctr[ERROR] << " error token/s" << endl; file_input.close(); file_output.close(); cout << " done."; /*for(int j=0;j < _line_number_holder.size() ;j++) { cout << _line_number_holder[j] << endl; }*/ } void Functions::generateTokenStream2(char *s) { generateTokenStream(s); cout << "Scanning \"" << s << "\" . . ."; file_input.open(s,ios::in); file_output.open("source.lst"); string str; file_output file_output << _YYYY << " " file_output << << << << "Lexer II: A Lexer with Cross-referencer" << endl; "Source File: " << s << "\t Date/Time: " << _MM << "/" << _DD << "/" _hh << ":" << _min << ":" << _sec; "\n\n";

file_output << "Token Stream: " << "\n\n" ; int line_count = 0; int i=0; int next_line_ctr=0; int val_before=0; int token_ctr=0; TOKEN tk; string lx; KeywordList::KW_CODES returnValue; while(i < _token_list.size()) { next_line_ctr++; tk = getToken(); lx = getLexeme(); returnValue = kw.search(lx); switch(tk) { case WORD: { if(returnValue == KeywordList::NOT_FOUND) { file_output << token_str[ID] << "(\"" << lx << "\") "; }

else { file_output << kw.kwlist[returnValue]; } token_ctr++; break; } case STRING: { file_output << token_str[STRING] << "(\"" << lx << "\") "; token_ctr++; break; } case NUMBER: { int found = 0; found = lx.find("."); if(found != string::npos) { file_output << token_str[REAL] <<"(\"" << lx << "\") "; } else { int i; int j=1; string int_val=""; string int_val_error=""; for(i=0;i<lx.length();i++) { if(j<=5) { int_val += lx.at(i); } else { int_val_error += lx.at(i); } j++; } file_output << token_str[INTEGER] <<"(\"" << int_val << "\") "; if(int_val_error!="") { file_output << token_str[ERROR] <<"(\"" << int_val_error << "\") "; token_ctr++; } } token_ctr++; break; } case SYMBOL: { int rv =sym.search(lx); if(rv == SymbolList::NOT_FOUND) file_output << token_str[ERROR] << "(\"" << lx << "\") "; else

file_output << sym.toStr(rv) << " "; token_ctr++; break; } case ERROR: { file_output << token_str[ERROR] << "(\"" << lx << "\") "; token_ctr++; break; } } i++; if((next_line_ctr % 4) == 0 && next_line_ctr != val_before) //exec next line { file_output << endl; } val_before = next_line_ctr; } file_output << "\n\nSUMMARY: " << token_ctr << " token/s" << endl; file_input.close(); file_output.close(); cout << " done."; } void Functions::generateSymbolTable(char *s) { generateTokenStream(s); cout << "Scanning \"" << s << "\" . . ."; file_input.open(s,ios::in); file_output.open("source.xrf"); string str; file_output file_output << _YYYY << " " file_output << << << << "Lexer II: A Lexer with Cross-referencer" << endl; "Source File: " << s << "\t Date/Time: " << _MM << "/" << _DD << "/" _hh << ":" << _min << ":" << _sec; "\n\n";

file_output << "SYMBOL TABLE ENTRIES" << "\n\n" ; int i=0; TOKEN tk; string lx; KeywordList::KW_CODES returnValue; SymbolTable::SYMTAB_POS rv; int line_num_holder=0; while(i < _token_list.size()) { tk = getToken(); lx = getLexeme(); returnValue = kw.search(lx); switch(tk) { case WORD:

{ if(returnValue == KeywordList::NOT_FOUND) { rv = sym_tab.search(lx); if(rv == SymbolTable::NOT_FOUND) { sym_tab.insert(lx,_line_number_holder.at(i)); //sym_tab.update(_line_number_holder[returnValue], rv); } else { sym_tab.update(_line_number_holder.at(i), rv); } } break; } } i++; } sym_tab.printSymTab(file_output); file_input.close(); file_output.close(); cout << " done."; } Functions::TOKEN Functions::getToken() { _getToken_rotator++; return _token_list.at(_getToken_rotator); } string Functions::getLexeme() { _getLexeme_rotator++; return _lexeme_list.at(_getLexeme_rotator); } void Functions::printCmd() { int i=0; while(i < _token_list.size()) { cout << token_str[getToken()] << "(\"" << getLexeme() << "\")"; i++; } } void Functions::checkComment(char c1, char c2, int *i) { if(_comment_flag == 2) { if(c1 == '/' && c2 == '*') { _comment_flag = 1; } }

if(_comment_flag == 1) { if(c1 == '*' && c2 == '/') { _comment_flag = 2; *i = *i + 2; } } } void Functions::showHelp() { cout << "\nLexer: A Hand-Coded Lexical Analyzer Program V.1.0." << endl; cout << "Programmed by: Jake B. de Guzman" << endl; cout << endl; cout << "Usage: " << "\tLEXER [/s] [/c] [/t] [/x][filename | sourcefile]"; cout << "\nUsage Description:\n" << endl; cout << "\t/s" << "\toption to generate a source stream." << endl; cout << "\t\t" << "e.g. LEXER /s source.c\n" << endl; cout << "\t/c" << "\toption to generate a character stream." << endl; cout << "\t\t" << "e.g. LEXER /c source.c\n" << endl; cout << "\t/t" << "\toption to generate a token stream." << endl; cout << "\t\t" << "e.g. LEXER /t source.c\n" << endl; cout << "\t/x" << "\toption to generate a listing of the symbol table entries." << endl; cout << "\t\t" << "e.g. LEXER /x source.c\n" << endl; } /*InputChecker.h*/ class InputChecker { public: InputChecker(int argc, char *argv[]); //constructor enum RETURN_TYPE{NO_PARAM, NO_SRC_FILE, INVALID_SWITCH, INVALID_SRC_FILE, GEN_SRC_STRM, GEN_CHR_STRM, GEN_TOK_STRM,GEN_SYM_TAB, SHOW_HELP}; RETURN_TYPE check(void); private: RETURN_TYPE _return_val; }; InputChecker::InputChecker(int argc, char *argv[]) { if(argc == 1) { _return_val = NO_PARAM; return; } else if(argc <= 3) { string s = argv[1]; if(s == "/help") { _return_val = SHOW_HELP; return; } if(s != "/help" && argc == 2)

{ _return_val = NO_SRC_FILE; return; } string src_file_name = argv[2]; //check if file is a c source file int pos=0; pos = src_file_name.find("."); string file_extension = src_file_name.substr(pos+1,src_file_name.length()); if(file_extension != "c") { _return_val = INVALID_SRC_FILE; return; } if( s == "/c"){ _return_val = GEN_CHR_STRM; return; } else if( s == "/s"){ _return_val = GEN_SRC_STRM; return; } else if( s == "/t"){ _return_val = GEN_TOK_STRM; return; } else if( s == "/x" ){ _return_val = GEN_SYM_TAB; return; } else{ _return_val = INVALID_SWITCH; return; } } else { _return_val = INVALID_SWITCH; return; } } InputChecker::RETURN_TYPE InputChecker::check() { return _return_val; } //KeywordList.h #include <vector> #include <string> class KeywordList { public: KeywordList(); //constructor //the keyword's token codes enum KW_CODES {NOT_FOUND = 1,PRINTF,INCLUDE,MAIN,BREAK,CASE,CHAR,CONST,CONTINUE,DEFAULT,DO,DOUBLE, ELSE,ENUM,EXTERN,FLOAT,FOR,GOTO,IF,INT,LONG, REGISTER,RETURN,SHORT,SIGNED,SIZEOF,STATIC,STRUCT,

SWITCH,TYPEDEF,UNION,UNSIGNED,VOID,VOLATILE,WHILE}; //search 4 keyword , then return //the equivalent token KW_CODES search(string); vector<string> kwlist; private: int bin_search(string, int, int); vector<KW_CODES> kwcodes; }; KeywordList::KeywordList() { //predefined list of keywords kwlist.push_back("printf"); kwlist.push_back("include"); kwlist.push_back("main"); kwlist.push_back("break"); kwlist.push_back("case"); kwlist.push_back("char"); kwlist.push_back("const"); kwlist.push_back("continue"); kwlist.push_back("default"); kwlist.push_back("do"); kwlist.push_back("double"); kwlist.push_back("else"); kwlist.push_back("enum"); kwlist.push_back("extern"); kwlist.push_back("float"); kwlist.push_back("for"); kwlist.push_back("goto"); kwlist.push_back("if"); kwlist.push_back("int"); kwlist.push_back("long"); kwlist.push_back("register"); kwlist.push_back("return"); kwlist.push_back("short"); kwlist.push_back("signed"); kwlist.push_back("sizeof"); kwlist.push_back("static"); kwlist.push_back("struct"); kwlist.push_back("switch"); kwlist.push_back("typedef"); kwlist.push_back("union"); kwlist.push_back("unsigned"); kwlist.push_back("void"); kwlist.push_back("volatile"); kwlist.push_back("while"); //and the corresponding token codes kwcodes.push_back(PRINTF); kwcodes.push_back(INCLUDE); kwcodes.push_back(MAIN); kwcodes.push_back(BREAK); kwcodes.push_back(CASE); kwcodes.push_back(CHAR); kwcodes.push_back(CONST); kwcodes.push_back(CONTINUE);

kwcodes.push_back(DEFAULT); kwcodes.push_back(DO); kwcodes.push_back(DOUBLE); kwcodes.push_back(ELSE); kwcodes.push_back(ENUM); kwcodes.push_back(EXTERN); kwcodes.push_back(FLOAT); kwcodes.push_back(FOR); kwcodes.push_back(GOTO); kwcodes.push_back(IF); kwcodes.push_back(INT); kwcodes.push_back(LONG); kwcodes.push_back(REGISTER); kwcodes.push_back(RETURN); kwcodes.push_back(SHORT); kwcodes.push_back(SIGNED); kwcodes.push_back(SIZEOF); kwcodes.push_back(STATIC); kwcodes.push_back(STRUCT); kwcodes.push_back(SWITCH); kwcodes.push_back(TYPEDEF); kwcodes.push_back(UNION); kwcodes.push_back(UNSIGNED); kwcodes.push_back(VOID); kwcodes.push_back(VOLATILE); kwcodes.push_back(WHILE); } KeywordList::KW_CODES KeywordList::search(string key) { for(int i=0; i < kwlist.size() ; i++) { if(kwlist[i] == key) { return kwcodes.at(i); } } return NOT_FOUND; } //SymbolList.h #include <vector> #include <string> char *symbol_str[] = {"INCMNT_OP","DECMNT_OP","EQUAL_OP","GRTE_OP","LSTE_OP", "ADDE_OP","MINE_OP","MULTE_OP","DIVE_OP","MODE_OP","PLUS_OP", "MINUS_OP","MULT_OP","DIV_OP","ASSIGN_OP","SEMICOLON", "L_PAREN", "R_PAREN","L_BRACE","R_BRACE","COMMA"}; class SymbolList { public: SymbolList(); //constructor //the keyword's token codes enum SP_SYMBOL {NOT_FOUND = -1, INCMNT_OP,DECMNT_OP,EQUAL_OP,GRTE_OP,LSTE_OP, ADDE_OP,MINE_OP,MULTE_OP,DIVE_OP,MODE_OP,PLUS,MINUS,MULT,

DIV,ASSIGN,SEMICOLON,LPAREN,RPAREN,LBRACE,RBRACE,COMMA}; //search 4 keyword , then return //the equivalent token SP_SYMBOL search(string); string toStr(int val); vector<string> sp_sym_list; private: vector<SP_SYMBOL> sp_sym_codes; }; SymbolList::SymbolList() { //predefined list of keywords sp_sym_list.push_back("++"); //increment sp_sym_list.push_back("--"); //decrement sp_sym_list.push_back("=="); //equal to sp_sym_list.push_back(">="); //greater than or equal to sp_sym_list.push_back("<="); // less than or equal to sp_sym_list.push_back("+="); //complement for x = x + 2 sp_sym_list.push_back("-="); //complement for x = x - 2 sp_sym_list.push_back("*="); //complement for x = x * 2 sp_sym_list.push_back("/="); //complement for x = x / 2 sp_sym_list.push_back("%="); //complement for x = x % 2 sp_sym_list.push_back("+"); sp_sym_list.push_back("-"); sp_sym_list.push_back("*"); sp_sym_list.push_back("/"); sp_sym_list.push_back("="); sp_sym_list.push_back(";"); sp_sym_list.push_back("("); sp_sym_list.push_back(")"); sp_sym_list.push_back("{"); sp_sym_list.push_back("}"); sp_sym_list.push_back(","); //and the corresponding token codes sp_sym_codes.push_back(INCMNT_OP); sp_sym_codes.push_back(DECMNT_OP); sp_sym_codes.push_back(EQUAL_OP); sp_sym_codes.push_back(GRTE_OP); sp_sym_codes.push_back(LSTE_OP); sp_sym_codes.push_back(ADDE_OP); sp_sym_codes.push_back(MINE_OP); sp_sym_codes.push_back(MULTE_OP); sp_sym_codes.push_back(DIVE_OP); sp_sym_codes.push_back(MODE_OP); sp_sym_codes.push_back(PLUS); sp_sym_codes.push_back(MINUS); sp_sym_codes.push_back(MULT); sp_sym_codes.push_back(DIV); sp_sym_codes.push_back(ASSIGN); sp_sym_codes.push_back(SEMICOLON); sp_sym_codes.push_back(LPAREN); sp_sym_codes.push_back(RPAREN); sp_sym_codes.push_back(LBRACE); sp_sym_codes.push_back(RBRACE); sp_sym_codes.push_back(COMMA);

} SymbolList::SP_SYMBOL SymbolList::search(string key) { for(int i=0; i < sp_sym_codes.size() ; i++) { if(sp_sym_list[i] == key) { return sp_sym_codes.at(i); } } return NOT_FOUND; } string SymbolList::toStr(int val) { return symbol_str[val]; } /*SymbolTable.h*/ //SymbolTable.h #include <vector> #include <string> #include <fstream> typedef struct STEntry { string name; // lexeme int value; // value attribute int type; //type attribute vector<int> occurence; //occurence attribute }; class SymbolTable { private: vector<STEntry> entry; public: enum SYMTAB_POS{NOT_FOUND = -1}; void update(int,SymbolTable::SYMTAB_POS); void printSymTab(ofstream&); void insert(string,int); SYMTAB_POS search(string); }; //for adding new entries void SymbolTable::insert(string s,int line_num) { //setup the lexeme STEntry e; e.name = s; e.value = 0; e.type = 0; e.occurence.push_back(line_num); //add it in the symbol table entry.push_back(e);

} void SymbolTable::update(int line_num, SymbolTable::SYMTAB_POS i) { if(entry.at(i).occurence.at(entry.at(i).occurence.size()-1) != line_num) { entry.at(i).occurence.push_back(line_num); } } //for determining whether an entry //already exist, on exist, it returns //the position in the list SymbolTable::SYMTAB_POS SymbolTable::search(string s) { //symbol table is empty if(entry.size()==0) return NOT_FOUND; //search, if found return pos //else return NOT_FOUND for(int i=0; i<entry.size(); i++) if(s==entry.at(i).name) return (SYMTAB_POS)i; return NOT_FOUND; } void SymbolTable::printSymTab(ofstream& outs) { int id_ctr=0; outs << endl << "ID\tvalue\ttype\toccurence" << endl; for(int i=0; i < entry.size(); i++) { outs << entry.at(i).name << "\t"; outs << entry.at(i).value << "\t\t"; outs << entry.at(i).type << "\t\t"; for(int j=0 ; j < entry.at(i).occurence.size() ; j++) { outs << entry.at(i).occurence.at(j) << " "; } outs << endl; id_ctr++; } outs << "\nThere are " << id_ctr << " identifiers." << endl; //cout << entry.at(0).occurence.at(0) << endl; } /*SAMPLE SOURCE CODE*/ /* source.c */ /*#include <stdio.h>*/ int main() { int x; x = 20121.12;

printf("Hello World! %d", x); /*hello*/x = x + 1; y = x; x = y + y; x@@@++; if(x >= 2012123) { printf("New value of x is %d", x); } return 0; }@@@ /*SAMPLE SESSION #1*/ Lexer II: A Lexer with Cross-referencer Source File: source.c Date/Time: 3/6/2012 23:20:40 Token Stream: intmainL_PAREN R_PAREN L_BRACE intID("x") SEMICOLON ID("x") ASSIGN_OP REAL("20121.12") SEMICOLON printfL_PAREN STRING("Hello World! %d") COMMA ID("x") R_PAREN SEMICOLON ID("x") ASSIGN_OP ID("x") PLUS_OP INTEGER("1") SEMICOLON ID("y") ASSIGN_OP ID("x") SEMICOLON ID("x") ASSIGN_OP ID("y") PLUS_OP ID("y") SEMICOLON ID("x") ERROR("@@@") INCMNT_OP SEMICOLON if L_PAREN ID("x") GRTE_OP INTEGER("20121") ERROR("23") R_PAREN L_BRACE printfL_PAREN STRING("New value of x is %d") COMMA ID("x") R_PAREN SEMICOLON R_BRACE returnINTEGER("0") SEMICOLON R_BRACE ERROR("@@@") SUMMARY: 60 token/s /*SAMPLE SESSION #2*/ Lexer II: A Lexer with Cross-referencer Source File: source.c Date/Time: 3/6/2012 23:20:43 SYMBOL TABLE ENTRIES ID x y value type 0 0 occurence 0 0

7 8 10 11 12 13 14 15 17 12 13

There are 2 identifiers.

Potrebbero piacerti anche