/*"Mini Search Engine" - Data Structures and Algorithms
Caleb Smith - Due 8/2/2013 */
#include <iostream> #include <sstream> #include <fstream> #include <string> #include "hash.h" #include <dirent.h> using namespace std; int main(){ /*Main Function- Builds HashTable class for storing words. Reads words from documents. Stores words in HashTable Accepts queries - displays documents containing specified words.*/ stringstream stream; HashTable hashT; char buffer[100]; char key; string word; string order; int select = 0; int check = 0; Node* queries[3] = {0, 0, 0}; Node* temp = 0; Node* wordLoc; cout << endl; /*New input stuff.*/ ifstream fin; string dir, filepath; int doc; DIR *dp; struct dirent *dirp; /*Reading words from documents begins here. Opens documents directory, reads for files one-by-one. Opens file, reads file line-by-line (skipping formatting lines beginning with '<' character), reads individual words from line. (As discussed in class, replaced any mid-word punctuation (except '-') with spaces, to clean up words.) Stores words in hash table using HashTable class' hash() function.*/ dp = opendir("documents"); if(dp == NULL){ cout << endl << "Error - No documents folder; cannot store words ." << endl; return 0; } else{ cout << endl << "Reading from documents." << endl; } dir = "documents"; while((dirp = readdir(dp))){ /*Get file # for storage.*/ doc=((dirp->d_name[9] - 48)*1000) + ((dirp->d_name[10] - 48)*100 ); doc=doc + ((dirp->d_name[11] - 48)*10) + (dirp->d_name[12] - 48) ; /*Subtract 1; needs to correspond to array indexing, so Doc 1 will be noted as 0, and Doc 50 as 49.*/ doc = doc - 1; filepath = dir + "/" + dirp->d_name; fin.open(filepath.c_str() ); while(fin.getline(buffer, 100)){ /*Reads through document line-by-line.*/ if(buffer[0] != '<'){ /*Iteratively "cleaning" the line, removing unneeded punctuation.*/ for(int i = 0;i < 100;i++){ if(!isalpha(buffer[i])&&buffer[i]!='-'){ buffer[i] = ' '; } } stream << buffer; /*Reading words individually, storing.*/ while(stream.getline(buffer, 100, ' ')){ word = buffer; hashT.hash(buffer[0], word, doc); }; } /*Resetting stringstream for latter use.*/ stream << ""; stream.clear(); }; fin.close(); } /*File reading/word storage process ends here.*/ /*User input/actually functional, useful part of program begins here.*/ while(1 == 1){ /*Loop prompts user for command/query.*/ cout << "Input command: > "; /*Commands first read by line, then split and read by character using similar method as the above word storage (without the cleaning).*/ while(cin.getline(buffer, 100)){ ///output command, for reading input from a file ///so that the user knows which command query was ///carried out. cout<<endl<<buffer<<endl; stream << buffer; while(stream.getline(buffer, 50, ' ')){ /*Reads input word-by-word.*/ /// cout << buffer << endl; word = buffer; /*resets pointer*/ if(temp != 0){ temp = 0; } /*Program-ending user command, makes reading input from a file for testing purposes easier.*/ if(word == "QUIT"){ return 1; } /*Begins checking process for queries. Not included as separate function for simplicity.*/ if(word == "AND"){ /*If next read word is "AND" query operator.*/ stream << ""; stream.clear(); stream.getline(buffer, 50, ' '); word = buffer; /*Calls function to check if word contai ned in HashTable. Stores location of the n ode containing the word into the query res ult array.*/ queries[1] = hashT.findWord(word); /*Makes temporary new node denoting whic h documents contain both query in queries[0] AND t he previous query, i.e. queries[1]*/ temp = new Node(queries[0], queries[1], "AND"); /*Stores resultant query/node in queries [0], as basis for any succeding parts of the c urret line of input.*/ queries[0] = temp; /*Used for checking if a query was "comp lex" i.e. multi-word.*/ check = 1; } else if(word == "OR"){ /*If next read word is "OR" query operator. Functionally similar to the if(word == "AND") code.*/ stream << ""; stream.clear(); stream.getline(buffer, 50, ' '); word = buffer; /*Calls function checking if word contai ned in HashTable. Stores location of node con taining the wrd in the query result array.*/ queries[1] = hashT.findWord(word); /*Makes temporary new node denoting whic h documents contain query queries[0] OR the previo us queries[1]*/ temp = new Node(queries[0], queries[1], "OR"); /*Stores resultant query/node in queries [0], as basis for any succeding parts of the c urrent line of input.*/ queries[0] = temp; /*Used for checking if a query was "comp lex".*/ check = 1; } else if(queries[0] == 0){ /*If input is just a word, not a command.*/ /*Calls function searching for word in HashTable. Stores location of the nod e containing word in queries[0].*/ queries[0] = hashT.findWord(word); } } ///Outputting results of query. if(queries[0] != 0){ /*Will only == 0 when the queried word is contained in NO documents, meaning not stored in the HashTable.* / /*Prints all documents matching query queries[0] 's search criteria.*/ queries[0]->printDocs(); } /*else*/ if(queries[0] == 0){ /*When word is not stored anywhere in HashTable.*/ cout << "Those words are in no documents." << en dl; } if(check == 1){ /*Only == 1 when a complex query has occured. when a complex query is carried out, queries[0] ends up containing a node that is NOT part of the HashTable. As such, it can be deleted, as a memory-sav er.*/ delete queries[0]; check = 0; } /*"Resets" queries array after query is done.*/ queries[0] = 0; queries[1] = 0; queries[2] = 0; /*When done reading line, clear stream for next use.*/ stream << ""; stream.clear(); } /*Goes to beginnig of the while loop to accept another query.*/ }; ///End of while loop. ///End of program. return 1; };